1# Copyright (C) 2008-2012, 2016 Canonical Ltd
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16#
17
18"""Tests for btree indices."""
19
20import pprint
21import zlib
22
23from ... import (
24    errors,
25    fifo_cache,
26    lru_cache,
27    osutils,
28    tests,
29    transport,
30    )
31from .. import (
32    btree_index,
33    index as _mod_index,
34    )
35from ...tests import (
36    TestCaseWithTransport,
37    scenarios,
38    )
39from ...tests import (
40    features,
41    )
42
43
44load_tests = scenarios.load_tests_apply_scenarios
45
46
47def btreeparser_scenarios():
48    import breezy.bzr._btree_serializer_py as py_module
49    scenarios = [('python', {'parse_btree': py_module})]
50    if compiled_btreeparser_feature.available():
51        scenarios.append(('C',
52                          {'parse_btree': compiled_btreeparser_feature.module}))
53    return scenarios
54
55
56compiled_btreeparser_feature = features.ModuleAvailableFeature(
57    'breezy.bzr._btree_serializer_pyx')
58
59
60class BTreeTestCase(TestCaseWithTransport):
61    # test names here are suffixed by the key length and reference list count
62    # that they test.
63
64    def setUp(self):
65        super(BTreeTestCase, self).setUp()
66        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
67
68    def make_nodes(self, count, key_elements, reference_lists):
69        """Generate count*key_elements sample nodes."""
70        def _pos_to_key(pos, lead=b""):
71            return (lead + (b"%d" % pos) * 40,)
72        keys = []
73        for prefix_pos in range(key_elements):
74            if key_elements - 1:
75                prefix = _pos_to_key(prefix_pos)
76            else:
77                prefix = ()
78            for pos in range(count):
79                # TODO: This creates odd keys. When count == 100,000, it
80                #       creates a 240 byte key
81                key = prefix + _pos_to_key(pos)
82                value = b"value:%d" % pos
83                if reference_lists:
84                    # generate some references
85                    refs = []
86                    for list_pos in range(reference_lists):
87                        # as many keys in each list as its index + the key depth
88                        # mod 2 - this generates both 0 length lists and
89                        # ones slightly longer than the number of lists.
90                        # It also ensures we have non homogeneous lists.
91                        refs.append([])
92                        for ref_pos in range(list_pos + pos % 2):
93                            if pos % 2:
94                                # refer to a nearby key
95                                refs[-1].append(prefix
96                                                + _pos_to_key(pos - 1, b"ref"))
97                            else:
98                                # serial of this ref in the ref list
99                                refs[-1].append(prefix
100                                                + _pos_to_key(ref_pos, b"ref"))
101                        refs[-1] = tuple(refs[-1])
102                    refs = tuple(refs)
103                else:
104                    refs = ()
105                keys.append((key, value, refs))
106        return keys
107
108    def shrink_page_size(self):
109        """Shrink the default page size so that less fits in a page."""
110        self.overrideAttr(btree_index, '_PAGE_SIZE')
111        btree_index._PAGE_SIZE = 2048
112
113    def assertEqualApproxCompressed(self, expected, actual, slop=6):
114        """Check a count of compressed bytes is approximately as expected
115
116        Relying on compressed length being stable even with fixed inputs is
117        slightly bogus, but zlib is stable enough that this mostly works.
118        """
119        if not expected - slop < actual < expected + slop:
120            self.fail("Expected around %d bytes compressed but got %d" %
121                      (expected, actual))
122
123
124class TestBTreeBuilder(BTreeTestCase):
125
126    def test_clear_cache(self):
127        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
128        # This is a no-op, but we need the api to be consistent with other
129        # BTreeGraphIndex apis.
130        builder.clear_cache()
131
132    def test_empty_1_0(self):
133        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
134        # NamedTemporaryFile dies on builder.finish().read(). weird.
135        temp_file = builder.finish()
136        content = temp_file.read()
137        del temp_file
138        self.assertEqual(
139            b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
140            b"row_lengths=\n",
141            content)
142
143    def test_empty_2_1(self):
144        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
145        # NamedTemporaryFile dies on builder.finish().read(). weird.
146        temp_file = builder.finish()
147        content = temp_file.read()
148        del temp_file
149        self.assertEqual(
150            b"B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
151            b"row_lengths=\n",
152            content)
153
154    def test_root_leaf_1_0(self):
155        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
156        nodes = self.make_nodes(5, 1, 0)
157        for node in nodes:
158            builder.add_node(*node)
159        # NamedTemporaryFile dies on builder.finish().read(). weird.
160        temp_file = builder.finish()
161        content = temp_file.read()
162        del temp_file
163        self.assertEqual(131, len(content))
164        self.assertEqual(
165            b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
166            b"row_lengths=1\n",
167            content[:73])
168        node_content = content[73:]
169        node_bytes = zlib.decompress(node_content)
170        expected_node = (b"type=leaf\n"
171                         b"0000000000000000000000000000000000000000\x00\x00value:0\n"
172                         b"1111111111111111111111111111111111111111\x00\x00value:1\n"
173                         b"2222222222222222222222222222222222222222\x00\x00value:2\n"
174                         b"3333333333333333333333333333333333333333\x00\x00value:3\n"
175                         b"4444444444444444444444444444444444444444\x00\x00value:4\n")
176        self.assertEqual(expected_node, node_bytes)
177
178    def test_root_leaf_2_2(self):
179        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
180        nodes = self.make_nodes(5, 2, 2)
181        for node in nodes:
182            builder.add_node(*node)
183        # NamedTemporaryFile dies on builder.finish().read(). weird.
184        temp_file = builder.finish()
185        content = temp_file.read()
186        del temp_file
187        self.assertEqual(238, len(content))
188        self.assertEqual(
189            b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
190            b"row_lengths=1\n",
191            content[:74])
192        node_content = content[74:]
193        node_bytes = zlib.decompress(node_content)
194        expected_node = (
195            b"type=leaf\n"
196            b"0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
197            b"0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
198            b"0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
199            b"0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
200            b"0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
201            b"1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
202            b"1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
203            b"1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
204            b"1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
205            b"1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
206            b""
207            )
208        self.assertEqual(expected_node, node_bytes)
209
210    def test_2_leaves_1_0(self):
211        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
212        nodes = self.make_nodes(400, 1, 0)
213        for node in nodes:
214            builder.add_node(*node)
215        # NamedTemporaryFile dies on builder.finish().read(). weird.
216        temp_file = builder.finish()
217        content = temp_file.read()
218        del temp_file
219        self.assertEqualApproxCompressed(9283, len(content))
220        self.assertEqual(
221            b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
222            b"row_lengths=1,2\n",
223            content[:77])
224        root = content[77:4096]
225        leaf1 = content[4096:8192]
226        leaf2 = content[8192:]
227        root_bytes = zlib.decompress(root)
228        expected_root = (
229            b"type=internal\n"
230            b"offset=0\n"
231            ) + (b"307" * 40) + b"\n"
232        self.assertEqual(expected_root, root_bytes)
233        # We already know serialisation works for leaves, check key selection:
234        leaf1_bytes = zlib.decompress(leaf1)
235        sorted_node_keys = sorted(node[0] for node in nodes)
236        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
237        self.assertEqual(231, len(node))
238        self.assertEqual(sorted_node_keys[:231], node.all_keys())
239        leaf2_bytes = zlib.decompress(leaf2)
240        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
241        self.assertEqual(400 - 231, len(node))
242        self.assertEqual(sorted_node_keys[231:], node.all_keys())
243
244    def test_last_page_rounded_1_layer(self):
245        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
246        nodes = self.make_nodes(10, 1, 0)
247        for node in nodes:
248            builder.add_node(*node)
249        # NamedTemporaryFile dies on builder.finish().read(). weird.
250        temp_file = builder.finish()
251        content = temp_file.read()
252        del temp_file
253        self.assertEqualApproxCompressed(155, len(content))
254        self.assertEqual(
255            b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
256            b"row_lengths=1\n",
257            content[:74])
258        # Check thelast page is well formed
259        leaf2 = content[74:]
260        leaf2_bytes = zlib.decompress(leaf2)
261        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
262        self.assertEqual(10, len(node))
263        sorted_node_keys = sorted(node[0] for node in nodes)
264        self.assertEqual(sorted_node_keys, node.all_keys())
265
266    def test_last_page_not_rounded_2_layer(self):
267        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
268        nodes = self.make_nodes(400, 1, 0)
269        for node in nodes:
270            builder.add_node(*node)
271        # NamedTemporaryFile dies on builder.finish().read(). weird.
272        temp_file = builder.finish()
273        content = temp_file.read()
274        del temp_file
275        self.assertEqualApproxCompressed(9283, len(content))
276        self.assertEqual(
277            b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
278            b"row_lengths=1,2\n",
279            content[:77])
280        # Check the last page is well formed
281        leaf2 = content[8192:]
282        leaf2_bytes = zlib.decompress(leaf2)
283        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
284        self.assertEqual(400 - 231, len(node))
285        sorted_node_keys = sorted(node[0] for node in nodes)
286        self.assertEqual(sorted_node_keys[231:], node.all_keys())
287
288    def test_three_level_tree_details(self):
289        # The left most pointer in the second internal node in a row should
290        # pointer to the second node that the internal node is for, _not_
291        # the first, otherwise the first node overlaps with the last node of
292        # the prior internal node on that row.
293        self.shrink_page_size()
294        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
295        # 40K nodes is enough to create a two internal nodes on the second
296        # level, with a 2K page size
297        nodes = self.make_nodes(20000, 2, 2)
298
299        for node in nodes:
300            builder.add_node(*node)
301        t = transport.get_transport_from_url('trace+' + self.get_url(''))
302        size = t.put_file('index', self.time(builder.finish))
303        del builder
304        index = btree_index.BTreeGraphIndex(t, 'index', size)
305        # Seed the metadata, we're using internal calls now.
306        index.key_count()
307        self.assertEqual(3, len(index._row_lengths),
308                         "Not enough rows: %r" % index._row_lengths)
309        self.assertEqual(4, len(index._row_offsets))
310        self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
311        internal_nodes = index._get_internal_nodes([0, 1, 2])
312        root_node = internal_nodes[0]
313        internal_node1 = internal_nodes[1]
314        internal_node2 = internal_nodes[2]
315        # The left most node node2 points at should be one after the right most
316        # node pointed at by node1.
317        self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
318        # The left most key of the second node pointed at by internal_node2
319        # should be its first key. We can check this by looking for its first key
320        # in the second node it points at
321        pos = index._row_offsets[2] + internal_node2.offset + 1
322        leaf = index._get_leaf_nodes([pos])[pos]
323        self.assertTrue(internal_node2.keys[0] in leaf)
324
325    def test_2_leaves_2_2(self):
326        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
327        nodes = self.make_nodes(100, 2, 2)
328        for node in nodes:
329            builder.add_node(*node)
330        # NamedTemporaryFile dies on builder.finish().read(). weird.
331        temp_file = builder.finish()
332        content = temp_file.read()
333        del temp_file
334        self.assertEqualApproxCompressed(12643, len(content))
335        self.assertEqual(
336            b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
337            b"row_lengths=1,3\n",
338            content[:77])
339        root = content[77:4096]
340        leaf1 = content[4096:8192]
341        leaf2 = content[8192:12288]
342        leaf3 = content[12288:]
343        root_bytes = zlib.decompress(root)
344        expected_root = (
345            b"type=internal\n"
346            b"offset=0\n" +
347            (b"0" * 40) + b"\x00" + (b"91" * 40) + b"\n" +
348            (b"1" * 40) + b"\x00" + (b"81" * 40) + b"\n"
349            )
350        self.assertEqual(expected_root, root_bytes)
351        # We assume the other leaf nodes have been written correctly - layering
352        # FTW.
353
354    def test_spill_index_stress_1_1(self):
355        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
356        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
357        builder.add_node(*nodes[0])
358        # Test the parts of the index that take up memory are doing so
359        # predictably.
360        self.assertEqual(1, len(builder._nodes))
361        self.assertIs(None, builder._nodes_by_key)
362        builder.add_node(*nodes[1])
363        self.assertEqual(0, len(builder._nodes))
364        self.assertIs(None, builder._nodes_by_key)
365        self.assertEqual(1, len(builder._backing_indices))
366        self.assertEqual(2, builder._backing_indices[0].key_count())
367        # now back to memory
368        builder.add_node(*nodes[2])
369        self.assertEqual(1, len(builder._nodes))
370        self.assertIs(None, builder._nodes_by_key)
371        # And spills to a second backing index combing all
372        builder.add_node(*nodes[3])
373        self.assertEqual(0, len(builder._nodes))
374        self.assertIs(None, builder._nodes_by_key)
375        self.assertEqual(2, len(builder._backing_indices))
376        self.assertEqual(None, builder._backing_indices[0])
377        self.assertEqual(4, builder._backing_indices[1].key_count())
378        # The next spills to the 2-len slot
379        builder.add_node(*nodes[4])
380        builder.add_node(*nodes[5])
381        self.assertEqual(0, len(builder._nodes))
382        self.assertIs(None, builder._nodes_by_key)
383        self.assertEqual(2, len(builder._backing_indices))
384        self.assertEqual(2, builder._backing_indices[0].key_count())
385        self.assertEqual(4, builder._backing_indices[1].key_count())
386        # Next spill combines
387        builder.add_node(*nodes[6])
388        builder.add_node(*nodes[7])
389        self.assertEqual(3, len(builder._backing_indices))
390        self.assertEqual(None, builder._backing_indices[0])
391        self.assertEqual(None, builder._backing_indices[1])
392        self.assertEqual(8, builder._backing_indices[2].key_count())
393        # And so forth - counting up in binary.
394        builder.add_node(*nodes[8])
395        builder.add_node(*nodes[9])
396        self.assertEqual(3, len(builder._backing_indices))
397        self.assertEqual(2, builder._backing_indices[0].key_count())
398        self.assertEqual(None, builder._backing_indices[1])
399        self.assertEqual(8, builder._backing_indices[2].key_count())
400        builder.add_node(*nodes[10])
401        builder.add_node(*nodes[11])
402        self.assertEqual(3, len(builder._backing_indices))
403        self.assertEqual(None, builder._backing_indices[0])
404        self.assertEqual(4, builder._backing_indices[1].key_count())
405        self.assertEqual(8, builder._backing_indices[2].key_count())
406        builder.add_node(*nodes[12])
407        # Test that memory and disk are both used for query methods; and that
408        # None is skipped over happily.
409        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
410                         list(builder.iter_all_entries()))
411        # Two nodes - one memory one disk
412        self.assertEqual({(builder,) + node for node in nodes[11:13]},
413                         set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
414        self.assertEqual(13, builder.key_count())
415        self.assertEqual({(builder,) + node for node in nodes[11:13]},
416                         set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
417        builder.add_node(*nodes[13])
418        self.assertEqual(3, len(builder._backing_indices))
419        self.assertEqual(2, builder._backing_indices[0].key_count())
420        self.assertEqual(4, builder._backing_indices[1].key_count())
421        self.assertEqual(8, builder._backing_indices[2].key_count())
422        builder.add_node(*nodes[14])
423        builder.add_node(*nodes[15])
424        self.assertEqual(4, len(builder._backing_indices))
425        self.assertEqual(None, builder._backing_indices[0])
426        self.assertEqual(None, builder._backing_indices[1])
427        self.assertEqual(None, builder._backing_indices[2])
428        self.assertEqual(16, builder._backing_indices[3].key_count())
429        # Now finish, and check we got a correctly ordered tree
430        t = self.get_transport('')
431        size = t.put_file('index', builder.finish())
432        index = btree_index.BTreeGraphIndex(t, 'index', size)
433        nodes = list(index.iter_all_entries())
434        self.assertEqual(sorted(nodes), nodes)
435        self.assertEqual(16, len(nodes))
436
437    def test_spill_index_stress_1_1_no_combine(self):
438        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
439        builder.set_optimize(for_size=False, combine_backing_indices=False)
440        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
441        builder.add_node(*nodes[0])
442        # Test the parts of the index that take up memory are doing so
443        # predictably.
444        self.assertEqual(1, len(builder._nodes))
445        self.assertIs(None, builder._nodes_by_key)
446        builder.add_node(*nodes[1])
447        self.assertEqual(0, len(builder._nodes))
448        self.assertIs(None, builder._nodes_by_key)
449        self.assertEqual(1, len(builder._backing_indices))
450        self.assertEqual(2, builder._backing_indices[0].key_count())
451        # now back to memory
452        builder.add_node(*nodes[2])
453        self.assertEqual(1, len(builder._nodes))
454        self.assertIs(None, builder._nodes_by_key)
455        # And spills to a second backing index but doesn't combine
456        builder.add_node(*nodes[3])
457        self.assertEqual(0, len(builder._nodes))
458        self.assertIs(None, builder._nodes_by_key)
459        self.assertEqual(2, len(builder._backing_indices))
460        for backing_index in builder._backing_indices:
461            self.assertEqual(2, backing_index.key_count())
462        # The next spills to the 3rd slot
463        builder.add_node(*nodes[4])
464        builder.add_node(*nodes[5])
465        self.assertEqual(0, len(builder._nodes))
466        self.assertIs(None, builder._nodes_by_key)
467        self.assertEqual(3, len(builder._backing_indices))
468        for backing_index in builder._backing_indices:
469            self.assertEqual(2, backing_index.key_count())
470        # Now spill a few more, and check that we don't combine
471        builder.add_node(*nodes[6])
472        builder.add_node(*nodes[7])
473        builder.add_node(*nodes[8])
474        builder.add_node(*nodes[9])
475        builder.add_node(*nodes[10])
476        builder.add_node(*nodes[11])
477        builder.add_node(*nodes[12])
478        self.assertEqual(6, len(builder._backing_indices))
479        for backing_index in builder._backing_indices:
480            self.assertEqual(2, backing_index.key_count())
481        # Test that memory and disk are both used for query methods; and that
482        # None is skipped over happily.
483        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
484                         list(builder.iter_all_entries()))
485        # Two nodes - one memory one disk
486        self.assertEqual({(builder,) + node for node in nodes[11:13]},
487                         set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
488        self.assertEqual(13, builder.key_count())
489        self.assertEqual({(builder,) + node for node in nodes[11:13]},
490                         set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
491        builder.add_node(*nodes[13])
492        builder.add_node(*nodes[14])
493        builder.add_node(*nodes[15])
494        self.assertEqual(8, len(builder._backing_indices))
495        for backing_index in builder._backing_indices:
496            self.assertEqual(2, backing_index.key_count())
497        # Now finish, and check we got a correctly ordered tree
498        transport = self.get_transport('')
499        size = transport.put_file('index', builder.finish())
500        index = btree_index.BTreeGraphIndex(transport, 'index', size)
501        nodes = list(index.iter_all_entries())
502        self.assertEqual(sorted(nodes), nodes)
503        self.assertEqual(16, len(nodes))
504
505    def test_set_optimize(self):
506        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
507        builder.set_optimize(for_size=True)
508        self.assertTrue(builder._optimize_for_size)
509        builder.set_optimize(for_size=False)
510        self.assertFalse(builder._optimize_for_size)
511        # test that we can set combine_backing_indices without effecting
512        # _optimize_for_size
513        obj = object()
514        builder._optimize_for_size = obj
515        builder.set_optimize(combine_backing_indices=False)
516        self.assertFalse(builder._combine_backing_indices)
517        self.assertIs(obj, builder._optimize_for_size)
518        builder.set_optimize(combine_backing_indices=True)
519        self.assertTrue(builder._combine_backing_indices)
520        self.assertIs(obj, builder._optimize_for_size)
521
522    def test_spill_index_stress_2_2(self):
523        # test that references and longer keys don't confuse things.
524        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
525                                           spill_at=2)
526        nodes = self.make_nodes(16, 2, 2)
527        builder.add_node(*nodes[0])
528        # Test the parts of the index that take up memory are doing so
529        # predictably.
530        self.assertEqual(1, len(builder._nodes))
531        self.assertIs(None, builder._nodes_by_key)
532        builder.add_node(*nodes[1])
533        self.assertEqual(0, len(builder._nodes))
534        self.assertIs(None, builder._nodes_by_key)
535        self.assertEqual(1, len(builder._backing_indices))
536        self.assertEqual(2, builder._backing_indices[0].key_count())
537        # now back to memory
538        # Build up the nodes by key dict
539        old = dict(builder._get_nodes_by_key())
540        builder.add_node(*nodes[2])
541        self.assertEqual(1, len(builder._nodes))
542        self.assertIsNot(None, builder._nodes_by_key)
543        self.assertNotEqual({}, builder._nodes_by_key)
544        # We should have a new entry
545        self.assertNotEqual(old, builder._nodes_by_key)
546        # And spills to a second backing index combing all
547        builder.add_node(*nodes[3])
548        self.assertEqual(0, len(builder._nodes))
549        self.assertIs(None, builder._nodes_by_key)
550        self.assertEqual(2, len(builder._backing_indices))
551        self.assertEqual(None, builder._backing_indices[0])
552        self.assertEqual(4, builder._backing_indices[1].key_count())
553        # The next spills to the 2-len slot
554        builder.add_node(*nodes[4])
555        builder.add_node(*nodes[5])
556        self.assertEqual(0, len(builder._nodes))
557        self.assertIs(None, builder._nodes_by_key)
558        self.assertEqual(2, len(builder._backing_indices))
559        self.assertEqual(2, builder._backing_indices[0].key_count())
560        self.assertEqual(4, builder._backing_indices[1].key_count())
561        # Next spill combines
562        builder.add_node(*nodes[6])
563        builder.add_node(*nodes[7])
564        self.assertEqual(3, len(builder._backing_indices))
565        self.assertEqual(None, builder._backing_indices[0])
566        self.assertEqual(None, builder._backing_indices[1])
567        self.assertEqual(8, builder._backing_indices[2].key_count())
568        # And so forth - counting up in binary.
569        builder.add_node(*nodes[8])
570        builder.add_node(*nodes[9])
571        self.assertEqual(3, len(builder._backing_indices))
572        self.assertEqual(2, builder._backing_indices[0].key_count())
573        self.assertEqual(None, builder._backing_indices[1])
574        self.assertEqual(8, builder._backing_indices[2].key_count())
575        builder.add_node(*nodes[10])
576        builder.add_node(*nodes[11])
577        self.assertEqual(3, len(builder._backing_indices))
578        self.assertEqual(None, builder._backing_indices[0])
579        self.assertEqual(4, builder._backing_indices[1].key_count())
580        self.assertEqual(8, builder._backing_indices[2].key_count())
581        builder.add_node(*nodes[12])
582        # Test that memory and disk are both used for query methods; and that
583        # None is skipped over happily.
584        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
585                         list(builder.iter_all_entries()))
586        # Two nodes - one memory one disk
587        self.assertEqual({(builder,) + node for node in nodes[11:13]},
588                         set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
589        self.assertEqual(13, builder.key_count())
590        self.assertEqual({(builder,) + node for node in nodes[11:13]},
591                         set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
592        builder.add_node(*nodes[13])
593        self.assertEqual(3, len(builder._backing_indices))
594        self.assertEqual(2, builder._backing_indices[0].key_count())
595        self.assertEqual(4, builder._backing_indices[1].key_count())
596        self.assertEqual(8, builder._backing_indices[2].key_count())
597        builder.add_node(*nodes[14])
598        builder.add_node(*nodes[15])
599        self.assertEqual(4, len(builder._backing_indices))
600        self.assertEqual(None, builder._backing_indices[0])
601        self.assertEqual(None, builder._backing_indices[1])
602        self.assertEqual(None, builder._backing_indices[2])
603        self.assertEqual(16, builder._backing_indices[3].key_count())
604        # Now finish, and check we got a correctly ordered tree
605        transport = self.get_transport('')
606        size = transport.put_file('index', builder.finish())
607        index = btree_index.BTreeGraphIndex(transport, 'index', size)
608        nodes = list(index.iter_all_entries())
609        self.assertEqual(sorted(nodes), nodes)
610        self.assertEqual(16, len(nodes))
611
612    def test_spill_index_duplicate_key_caught_on_finish(self):
613        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
614        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
615        builder.add_node(*nodes[0])
616        builder.add_node(*nodes[1])
617        builder.add_node(*nodes[0])
618        self.assertRaises(_mod_index.BadIndexDuplicateKey, builder.finish)
619
620
621class TestBTreeIndex(BTreeTestCase):
622
623    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
624        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
625                                           key_elements=key_elements)
626        for key, value, references in nodes:
627            builder.add_node(key, value, references)
628        stream = builder.finish()
629        trans = transport.get_transport_from_url('trace+' + self.get_url())
630        size = trans.put_file('index', stream)
631        return btree_index.BTreeGraphIndex(trans, 'index', size)
632
633    def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
634                               offset=0):
635        builder = btree_index.BTreeBuilder(key_elements=key_elements,
636                                           reference_lists=ref_lists)
637        builder.add_nodes(nodes)
638        transport = self.get_transport('')
639        # NamedTemporaryFile dies on builder.finish().read(). weird.
640        temp_file = builder.finish()
641        content = temp_file.read()
642        del temp_file
643        size = len(content)
644        transport.put_bytes('index', (b' ' * offset) + content)
645        return btree_index.BTreeGraphIndex(transport, 'index', size=size,
646                                           offset=offset)
647
648    def test_clear_cache(self):
649        nodes = self.make_nodes(160, 2, 2)
650        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
651        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
652        self.assertEqual([1, 4], index._row_lengths)
653        self.assertIsNot(None, index._root_node)
654        internal_node_pre_clear = set(index._internal_node_cache)
655        self.assertTrue(len(index._leaf_node_cache) > 0)
656        index.clear_cache()
657        # We don't touch _root_node or _internal_node_cache, both should be
658        # small, and can save a round trip or two
659        self.assertIsNot(None, index._root_node)
660        # NOTE: We don't want to affect the _internal_node_cache, as we expect
661        #       it will be small, and if we ever do touch this index again, it
662        #       will save round-trips.  This assertion isn't very strong,
663        #       becuase without a 3-level index, we don't have any internal
664        #       nodes cached.
665        self.assertEqual(internal_node_pre_clear,
666                         set(index._internal_node_cache))
667        self.assertEqual(0, len(index._leaf_node_cache))
668
669    def test_trivial_constructor(self):
670        t = transport.get_transport_from_url('trace+' + self.get_url(''))
671        index = btree_index.BTreeGraphIndex(t, 'index', None)
672        # Checks the page size at load, but that isn't logged yet.
673        self.assertEqual([], t._activity)
674
675    def test_with_size_constructor(self):
676        t = transport.get_transport_from_url('trace+' + self.get_url(''))
677        index = btree_index.BTreeGraphIndex(t, 'index', 1)
678        # Checks the page size at load, but that isn't logged yet.
679        self.assertEqual([], t._activity)
680
681    def test_empty_key_count_no_size(self):
682        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
683        t = transport.get_transport_from_url('trace+' + self.get_url(''))
684        t.put_file('index', builder.finish())
685        index = btree_index.BTreeGraphIndex(t, 'index', None)
686        del t._activity[:]
687        self.assertEqual([], t._activity)
688        self.assertEqual(0, index.key_count())
689        # The entire index should have been requested (as we generally have the
690        # size available, and doing many small readvs is inappropriate).
691        # We can't tell how much was actually read here, but - check the code.
692        self.assertEqual([('get', 'index')], t._activity)
693
694    def test_empty_key_count(self):
695        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
696        t = transport.get_transport_from_url('trace+' + self.get_url(''))
697        size = t.put_file('index', builder.finish())
698        self.assertEqual(72, size)
699        index = btree_index.BTreeGraphIndex(t, 'index', size)
700        del t._activity[:]
701        self.assertEqual([], t._activity)
702        self.assertEqual(0, index.key_count())
703        # The entire index should have been read, as 4K > size
704        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
705                         t._activity)
706
707    def test_non_empty_key_count_2_2(self):
708        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
709        nodes = self.make_nodes(35, 2, 2)
710        for node in nodes:
711            builder.add_node(*node)
712        t = transport.get_transport_from_url('trace+' + self.get_url(''))
713        size = t.put_file('index', builder.finish())
714        index = btree_index.BTreeGraphIndex(t, 'index', size)
715        del t._activity[:]
716        self.assertEqual([], t._activity)
717        self.assertEqual(70, index.key_count())
718        # The entire index should have been read, as it is one page long.
719        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
720                         t._activity)
721        self.assertEqualApproxCompressed(1173, size)
722
723    def test_with_offset_no_size(self):
724        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
725                                            offset=1234,
726                                            nodes=self.make_nodes(200, 1, 1))
727        index._size = None  # throw away the size info
728        self.assertEqual(200, index.key_count())
729
730    def test_with_small_offset(self):
731        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
732                                            offset=1234,
733                                            nodes=self.make_nodes(200, 1, 1))
734        self.assertEqual(200, index.key_count())
735
736    def test_with_large_offset(self):
737        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
738                                            offset=123456,
739                                            nodes=self.make_nodes(200, 1, 1))
740        self.assertEqual(200, index.key_count())
741
742    def test__read_nodes_no_size_one_page_reads_once(self):
743        self.make_index(nodes=[((b'key',), b'value', ())])
744        trans = transport.get_transport_from_url('trace+' + self.get_url())
745        index = btree_index.BTreeGraphIndex(trans, 'index', None)
746        del trans._activity[:]
747        nodes = dict(index._read_nodes([0]))
748        self.assertEqual({0}, set(nodes))
749        node = nodes[0]
750        self.assertEqual([(b'key',)], node.all_keys())
751        self.assertEqual([('get', 'index')], trans._activity)
752
753    def test__read_nodes_no_size_multiple_pages(self):
754        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
755        index.key_count()
756        num_pages = index._row_offsets[-1]
757        # Reopen with a traced transport and no size
758        trans = transport.get_transport_from_url('trace+' + self.get_url())
759        index = btree_index.BTreeGraphIndex(trans, 'index', None)
760        del trans._activity[:]
761        nodes = dict(index._read_nodes([0]))
762        self.assertEqual(list(range(num_pages)), sorted(nodes))
763
764    def test_2_levels_key_count_2_2(self):
765        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
766        nodes = self.make_nodes(160, 2, 2)
767        for node in nodes:
768            builder.add_node(*node)
769        t = transport.get_transport_from_url('trace+' + self.get_url(''))
770        size = t.put_file('index', builder.finish())
771        self.assertEqualApproxCompressed(17692, size)
772        index = btree_index.BTreeGraphIndex(t, 'index', size)
773        del t._activity[:]
774        self.assertEqual([], t._activity)
775        self.assertEqual(320, index.key_count())
776        # The entire index should not have been read.
777        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
778                         t._activity)
779
780    def test_validate_one_page(self):
781        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
782        nodes = self.make_nodes(45, 2, 2)
783        for node in nodes:
784            builder.add_node(*node)
785        t = transport.get_transport_from_url('trace+' + self.get_url(''))
786        size = t.put_file('index', builder.finish())
787        index = btree_index.BTreeGraphIndex(t, 'index', size)
788        del t._activity[:]
789        self.assertEqual([], t._activity)
790        index.validate()
791        # The entire index should have been read linearly.
792        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
793                         t._activity)
794        self.assertEqualApproxCompressed(1488, size)
795
796    def test_validate_two_pages(self):
797        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
798        nodes = self.make_nodes(80, 2, 2)
799        for node in nodes:
800            builder.add_node(*node)
801        t = transport.get_transport_from_url('trace+' + self.get_url(''))
802        size = t.put_file('index', builder.finish())
803        # Root page, 2 leaf pages
804        self.assertEqualApproxCompressed(9339, size)
805        index = btree_index.BTreeGraphIndex(t, 'index', size)
806        del t._activity[:]
807        self.assertEqual([], t._activity)
808        index.validate()
809        rem = size - 8192  # Number of remaining bytes after second block
810        # The entire index should have been read linearly.
811        self.assertEqual(
812            [('readv', 'index', [(0, 4096)], False, None),
813             ('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
814            t._activity)
815        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
816        # node and make validate find them.
817
818    def test_eq_ne(self):
819        # two indices are equal when constructed with the same parameters:
820        t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
821        t2 = self.get_transport()
822        self.assertTrue(
823            btree_index.BTreeGraphIndex(t1, 'index', None)
824            == btree_index.BTreeGraphIndex(t1, 'index', None))
825        self.assertTrue(
826            btree_index.BTreeGraphIndex(t1, 'index', 20)
827            == btree_index.BTreeGraphIndex(t1, 'index', 20))
828        self.assertFalse(
829            btree_index.BTreeGraphIndex(t1, 'index', 20)
830            == btree_index.BTreeGraphIndex(t2, 'index', 20))
831        self.assertFalse(
832            btree_index.BTreeGraphIndex(t1, 'inde1', 20)
833            == btree_index.BTreeGraphIndex(t1, 'inde2', 20))
834        self.assertFalse(
835            btree_index.BTreeGraphIndex(t1, 'index', 10)
836            == btree_index.BTreeGraphIndex(t1, 'index', 20))
837        self.assertFalse(
838            btree_index.BTreeGraphIndex(t1, 'index', None)
839            != btree_index.BTreeGraphIndex(t1, 'index', None))
840        self.assertFalse(
841            btree_index.BTreeGraphIndex(t1, 'index', 20)
842            != btree_index.BTreeGraphIndex(t1, 'index', 20))
843        self.assertTrue(
844            btree_index.BTreeGraphIndex(t1, 'index', 20)
845            != btree_index.BTreeGraphIndex(t2, 'index', 20))
846        self.assertTrue(
847            btree_index.BTreeGraphIndex(t1, 'inde1', 20)
848            != btree_index.BTreeGraphIndex(t1, 'inde2', 20))
849        self.assertTrue(
850            btree_index.BTreeGraphIndex(t1, 'index', 10)
851            != btree_index.BTreeGraphIndex(t1, 'index', 20))
852
853    def test_key_too_big(self):
854        # the size that matters here is the _compressed_ size of the key, so we can't
855        # do a simple character repeat.
856        bigKey = b''.join(b'%d' % n for n in range(btree_index._PAGE_SIZE))
857        self.assertRaises(_mod_index.BadIndexKey,
858                          self.make_index,
859                          nodes=[((bigKey,), b'value', ())])
860
861    def test_iter_all_only_root_no_size(self):
862        self.make_index(nodes=[((b'key',), b'value', ())])
863        t = transport.get_transport_from_url('trace+' + self.get_url(''))
864        index = btree_index.BTreeGraphIndex(t, 'index', None)
865        del t._activity[:]
866        self.assertEqual([((b'key',), b'value')],
867                         [x[1:] for x in index.iter_all_entries()])
868        self.assertEqual([('get', 'index')], t._activity)
869
870    def test_iter_all_entries_reads(self):
871        # iterating all entries reads the header, then does a linear
872        # read.
873        self.shrink_page_size()
874        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
875        # 20k nodes is enough to create a two internal nodes on the second
876        # level, with a 2K page size
877        nodes = self.make_nodes(10000, 2, 2)
878        for node in nodes:
879            builder.add_node(*node)
880        t = transport.get_transport_from_url('trace+' + self.get_url(''))
881        size = t.put_file('index', builder.finish())
882        page_size = btree_index._PAGE_SIZE
883        del builder
884        index = btree_index.BTreeGraphIndex(t, 'index', size)
885        del t._activity[:]
886        self.assertEqual([], t._activity)
887        found_nodes = self.time(list, index.iter_all_entries())
888        bare_nodes = []
889        for node in found_nodes:
890            self.assertTrue(node[0] is index)
891            bare_nodes.append(node[1:])
892        self.assertEqual(3, len(index._row_lengths),
893                         "Not enough rows: %r" % index._row_lengths)
894        # Should be as long as the nodes we supplied
895        self.assertEqual(20000, len(found_nodes))
896        # Should have the same content
897        self.assertEqual(set(nodes), set(bare_nodes))
898        # Should have done linear scan IO up the index, ignoring
899        # the internal nodes:
900        # The entire index should have been read
901        total_pages = sum(index._row_lengths)
902        self.assertEqual(total_pages, index._row_offsets[-1])
903        self.assertEqualApproxCompressed(1303220, size)
904        # The start of the leaves
905        first_byte = index._row_offsets[-2] * page_size
906        readv_request = []
907        for offset in range(first_byte, size, page_size):
908            readv_request.append((offset, page_size))
909        # The last page is truncated
910        readv_request[-1] = (readv_request[-1][0], size % page_size)
911        expected = [('readv', 'index', [(0, page_size)], False, None),
912                    ('readv', 'index', readv_request, False, None)]
913        if expected != t._activity:
914            self.assertEqualDiff(pprint.pformat(expected),
915                                 pprint.pformat(t._activity))
916
917    def test_iter_entries_references_2_refs_resolved(self):
918        # iterating some entries reads just the pages needed. For now, to
919        # get it working and start measuring, only 4K pages are read.
920        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
921        # 80 nodes is enough to create a two-level index.
922        nodes = self.make_nodes(160, 2, 2)
923        for node in nodes:
924            builder.add_node(*node)
925        t = transport.get_transport_from_url('trace+' + self.get_url(''))
926        size = t.put_file('index', builder.finish())
927        del builder
928        index = btree_index.BTreeGraphIndex(t, 'index', size)
929        del t._activity[:]
930        self.assertEqual([], t._activity)
931        # search for one key
932        found_nodes = list(index.iter_entries([nodes[30][0]]))
933        bare_nodes = []
934        for node in found_nodes:
935            self.assertTrue(node[0] is index)
936            bare_nodes.append(node[1:])
937        # Should be as long as the nodes we supplied
938        self.assertEqual(1, len(found_nodes))
939        # Should have the same content
940        self.assertEqual(nodes[30], bare_nodes[0])
941        # Should have read the root node, then one leaf page:
942        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
943                          ('readv', 'index', [(8192, 4096), ], False, None)],
944                         t._activity)
945
946    def test_iter_key_prefix_1_element_key_None(self):
947        index = self.make_index()
948        self.assertRaises(_mod_index.BadIndexKey, list,
949                          index.iter_entries_prefix([(None, )]))
950
951    def test_iter_key_prefix_wrong_length(self):
952        index = self.make_index()
953        self.assertRaises(_mod_index.BadIndexKey, list,
954                          index.iter_entries_prefix([(b'foo', None)]))
955        index = self.make_index(key_elements=2)
956        self.assertRaises(_mod_index.BadIndexKey, list,
957                          index.iter_entries_prefix([(b'foo', )]))
958        self.assertRaises(_mod_index.BadIndexKey, list,
959                          index.iter_entries_prefix([(b'foo', None, None)]))
960
961    def test_iter_key_prefix_1_key_element_no_refs(self):
962        index = self.make_index(nodes=[
963            ((b'name', ), b'data', ()),
964            ((b'ref', ), b'refdata', ())])
965        self.assertEqual({(index, (b'name', ), b'data'),
966                          (index, (b'ref', ), b'refdata')},
967                         set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
968
969    def test_iter_key_prefix_1_key_element_refs(self):
970        index = self.make_index(1, nodes=[
971            ((b'name', ), b'data', ([(b'ref', )], )),
972            ((b'ref', ), b'refdata', ([], ))])
973        self.assertEqual({(index, (b'name', ), b'data', (((b'ref',),),)),
974                          (index, (b'ref', ), b'refdata', ((), ))},
975                         set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
976
977    def test_iter_key_prefix_2_key_element_no_refs(self):
978        index = self.make_index(key_elements=2, nodes=[
979            ((b'name', b'fin1'), b'data', ()),
980            ((b'name', b'fin2'), b'beta', ()),
981            ((b'ref', b'erence'), b'refdata', ())])
982        self.assertEqual({(index, (b'name', b'fin1'), b'data'),
983                          (index, (b'ref', b'erence'), b'refdata')},
984                         set(index.iter_entries_prefix(
985                             [(b'name', b'fin1'), (b'ref', b'erence')])))
986        self.assertEqual({(index, (b'name', b'fin1'), b'data'),
987                          (index, (b'name', b'fin2'), b'beta')},
988                         set(index.iter_entries_prefix([(b'name', None)])))
989
990    def test_iter_key_prefix_2_key_element_refs(self):
991        index = self.make_index(1, key_elements=2, nodes=[
992            ((b'name', b'fin1'), b'data', ([(b'ref', b'erence')], )),
993            ((b'name', b'fin2'), b'beta', ([], )),
994            ((b'ref', b'erence'), b'refdata', ([], ))])
995        self.assertEqual(
996            {(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
997                (index, (b'ref', b'erence'), b'refdata', ((), ))},
998            set(index.iter_entries_prefix(
999                [(b'name', b'fin1'), (b'ref', b'erence')])))
1000        self.assertEqual(
1001            {(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
1002                (index, (b'name', b'fin2'), b'beta', ((), ))},
1003            set(index.iter_entries_prefix([(b'name', None)])))
1004
1005    # XXX: external_references tests are duplicated in test_index.  We
1006    # probably should have per_graph_index tests...
1007    def test_external_references_no_refs(self):
1008        index = self.make_index(ref_lists=0, nodes=[])
1009        self.assertRaises(ValueError, index.external_references, 0)
1010
1011    def test_external_references_no_results(self):
1012        index = self.make_index(ref_lists=1, nodes=[
1013            ((b'key',), b'value', ([],))])
1014        self.assertEqual(set(), index.external_references(0))
1015
1016    def test_external_references_missing_ref(self):
1017        missing_key = (b'missing',)
1018        index = self.make_index(ref_lists=1, nodes=[
1019            ((b'key',), b'value', ([missing_key],))])
1020        self.assertEqual({missing_key}, index.external_references(0))
1021
1022    def test_external_references_multiple_ref_lists(self):
1023        missing_key = (b'missing',)
1024        index = self.make_index(ref_lists=2, nodes=[
1025            ((b'key',), b'value', ([], [missing_key]))])
1026        self.assertEqual(set([]), index.external_references(0))
1027        self.assertEqual({missing_key}, index.external_references(1))
1028
1029    def test_external_references_two_records(self):
1030        index = self.make_index(ref_lists=1, nodes=[
1031            ((b'key-1',), b'value', ([(b'key-2',)],)),
1032            ((b'key-2',), b'value', ([],)),
1033            ])
1034        self.assertEqual(set([]), index.external_references(0))
1035
1036    def test__find_ancestors_one_page(self):
1037        key1 = (b'key-1',)
1038        key2 = (b'key-2',)
1039        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1040            (key1, b'value', ([key2],)),
1041            (key2, b'value', ([],)),
1042            ])
1043        parent_map = {}
1044        missing_keys = set()
1045        search_keys = index._find_ancestors(
1046            [key1], 0, parent_map, missing_keys)
1047        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1048        self.assertEqual(set(), missing_keys)
1049        self.assertEqual(set(), search_keys)
1050
1051    def test__find_ancestors_one_page_w_missing(self):
1052        key1 = (b'key-1',)
1053        key2 = (b'key-2',)
1054        key3 = (b'key-3',)
1055        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1056            (key1, b'value', ([key2],)),
1057            (key2, b'value', ([],)),
1058            ])
1059        parent_map = {}
1060        missing_keys = set()
1061        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1062                                            missing_keys)
1063        self.assertEqual({key2: ()}, parent_map)
1064        # we know that key3 is missing because we read the page that it would
1065        # otherwise be on
1066        self.assertEqual({key3}, missing_keys)
1067        self.assertEqual(set(), search_keys)
1068
1069    def test__find_ancestors_one_parent_missing(self):
1070        key1 = (b'key-1',)
1071        key2 = (b'key-2',)
1072        key3 = (b'key-3',)
1073        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1074            (key1, b'value', ([key2],)),
1075            (key2, b'value', ([key3],)),
1076            ])
1077        parent_map = {}
1078        missing_keys = set()
1079        search_keys = index._find_ancestors([key1], 0, parent_map,
1080                                            missing_keys)
1081        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1082        self.assertEqual(set(), missing_keys)
1083        # all we know is that key3 wasn't present on the page we were reading
1084        # but if you look, the last key is key2 which comes before key3, so we
1085        # don't know whether key3 would land on this page or not.
1086        self.assertEqual({key3}, search_keys)
1087        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1088                                            missing_keys)
1089        # passing it back in, we are sure it is 'missing'
1090        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1091        self.assertEqual({key3}, missing_keys)
1092        self.assertEqual(set([]), search_keys)
1093
1094    def test__find_ancestors_dont_search_known(self):
1095        key1 = (b'key-1',)
1096        key2 = (b'key-2',)
1097        key3 = (b'key-3',)
1098        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1099            (key1, b'value', ([key2],)),
1100            (key2, b'value', ([key3],)),
1101            (key3, b'value', ([],)),
1102            ])
1103        # We already know about key2, so we won't try to search for key3
1104        parent_map = {key2: (key3,)}
1105        missing_keys = set()
1106        search_keys = index._find_ancestors([key1], 0, parent_map,
1107                                            missing_keys)
1108        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1109        self.assertEqual(set(), missing_keys)
1110        self.assertEqual(set(), search_keys)
1111
1112    def test__find_ancestors_multiple_pages(self):
1113        # We need to use enough keys that we actually cause a split
1114        start_time = 1249671539
1115        email = "joebob@example.com"
1116        nodes = []
1117        ref_lists = ((),)
1118        rev_keys = []
1119        for i in range(400):
1120            rev_id = ('%s-%s-%s' % (email,
1121                                    osutils.compact_date(start_time + i),
1122                                    osutils.rand_chars(16))).encode('ascii')
1123            rev_key = (rev_id,)
1124            nodes.append((rev_key, b'value', ref_lists))
1125            # We have a ref 'list' of length 1, with a list of parents, with 1
1126            # parent which is a key
1127            ref_lists = ((rev_key,),)
1128            rev_keys.append(rev_key)
1129        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1130        self.assertEqual(400, index.key_count())
1131        self.assertEqual(3, len(index._row_offsets))
1132        nodes = dict(index._read_nodes([1, 2]))
1133        l1 = nodes[1]
1134        l2 = nodes[2]
1135        min_l2_key = l2.min_key
1136        max_l1_key = l1.max_key
1137        self.assertTrue(max_l1_key < min_l2_key)
1138        parents_min_l2_key = l2[min_l2_key][1][0]
1139        self.assertEqual((l1.max_key,), parents_min_l2_key)
1140        # Now, whatever key we select that would fall on the second page,
1141        # should give us all the parents until the page break
1142        key_idx = rev_keys.index(min_l2_key)
1143        next_key = rev_keys[key_idx + 1]
1144        # So now when we get the parent map, we should get the key we are
1145        # looking for, min_l2_key, and then a reference to go look for the
1146        # parent of that key
1147        parent_map = {}
1148        missing_keys = set()
1149        search_keys = index._find_ancestors([next_key], 0, parent_map,
1150                                            missing_keys)
1151        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1152        self.assertEqual(set(), missing_keys)
1153        self.assertEqual({max_l1_key}, search_keys)
1154        parent_map = {}
1155        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1156                                            missing_keys)
1157        self.assertEqual(l1.all_keys(), sorted(parent_map))
1158        self.assertEqual(set(), missing_keys)
1159        self.assertEqual(set(), search_keys)
1160
1161    def test__find_ancestors_empty_index(self):
1162        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1163        parent_map = {}
1164        missing_keys = set()
1165        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1166                                            missing_keys)
1167        self.assertEqual(set(), search_keys)
1168        self.assertEqual({}, parent_map)
1169        self.assertEqual({('one',), ('two',)}, missing_keys)
1170
1171    def test_supports_unlimited_cache(self):
1172        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1173        # We need enough nodes to cause a page split (so we have both an
1174        # internal node and a couple leaf nodes. 500 seems to be enough.)
1175        nodes = self.make_nodes(500, 1, 0)
1176        for node in nodes:
1177            builder.add_node(*node)
1178        stream = builder.finish()
1179        trans = self.get_transport()
1180        size = trans.put_file('index', stream)
1181        index = btree_index.BTreeGraphIndex(trans, 'index', size)
1182        self.assertEqual(500, index.key_count())
1183        # We have an internal node
1184        self.assertEqual(2, len(index._row_lengths))
1185        # We have at least 2 leaf nodes
1186        self.assertTrue(index._row_lengths[-1] >= 2)
1187        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1188        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1189                         index._leaf_node_cache._max_cache)
1190        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1191        self.assertEqual(100, index._internal_node_cache._max_cache)
1192        # No change if unlimited_cache=False is passed
1193        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1194                                            unlimited_cache=False)
1195        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1196        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1197                         index._leaf_node_cache._max_cache)
1198        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1199        self.assertEqual(100, index._internal_node_cache._max_cache)
1200        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1201                                            unlimited_cache=True)
1202        self.assertIsInstance(index._leaf_node_cache, dict)
1203        self.assertIs(type(index._internal_node_cache), dict)
1204        # Exercise the lookup code
1205        entries = set(index.iter_entries([n[0] for n in nodes]))
1206        self.assertEqual(500, len(entries))
1207
1208
1209class TestBTreeNodes(BTreeTestCase):
1210
1211    scenarios = btreeparser_scenarios()
1212
1213    def setUp(self):
1214        super(TestBTreeNodes, self).setUp()
1215        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
1216
1217    def test_LeafNode_1_0(self):
1218        node_bytes = (b"type=leaf\n"
1219                      b"0000000000000000000000000000000000000000\x00\x00value:0\n"
1220                      b"1111111111111111111111111111111111111111\x00\x00value:1\n"
1221                      b"2222222222222222222222222222222222222222\x00\x00value:2\n"
1222                      b"3333333333333333333333333333333333333333\x00\x00value:3\n"
1223                      b"4444444444444444444444444444444444444444\x00\x00value:4\n")
1224        node = btree_index._LeafNode(node_bytes, 1, 0)
1225        # We do direct access, or don't care about order, to leaf nodes most of
1226        # the time, so a dict is useful:
1227        self.assertEqual({
1228            (b"0000000000000000000000000000000000000000",): (b"value:0", ()),
1229            (b"1111111111111111111111111111111111111111",): (b"value:1", ()),
1230            (b"2222222222222222222222222222222222222222",): (b"value:2", ()),
1231            (b"3333333333333333333333333333333333333333",): (b"value:3", ()),
1232            (b"4444444444444444444444444444444444444444",): (b"value:4", ()),
1233            }, dict(node.all_items()))
1234
1235    def test_LeafNode_2_2(self):
1236        node_bytes = (b"type=leaf\n"
1237                      b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1238                      b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1239                      b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1240                      b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1241                      b""
1242                      )
1243        node = btree_index._LeafNode(node_bytes, 2, 2)
1244        # We do direct access, or don't care about order, to leaf nodes most of
1245        # the time, so a dict is useful:
1246        self.assertEqual({
1247            (b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1248            (b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1249                                          ((b'00', b'ref00'), (b'01', b'ref01')))),
1250            (b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1251                                          ((b'11', b'ref22'), (b'11', b'ref22')))),
1252            (b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1253            }, dict(node.all_items()))
1254
1255    def test_InternalNode_1(self):
1256        node_bytes = (b"type=internal\n"
1257                      b"offset=1\n"
1258                      b"0000000000000000000000000000000000000000\n"
1259                      b"1111111111111111111111111111111111111111\n"
1260                      b"2222222222222222222222222222222222222222\n"
1261                      b"3333333333333333333333333333333333333333\n"
1262                      b"4444444444444444444444444444444444444444\n"
1263                      )
1264        node = btree_index._InternalNode(node_bytes)
1265        # We want to bisect to find the right children from this node, so a
1266        # vector is most useful.
1267        self.assertEqual([
1268            (b"0000000000000000000000000000000000000000",),
1269            (b"1111111111111111111111111111111111111111",),
1270            (b"2222222222222222222222222222222222222222",),
1271            (b"3333333333333333333333333333333333333333",),
1272            (b"4444444444444444444444444444444444444444",),
1273            ], node.keys)
1274        self.assertEqual(1, node.offset)
1275
1276    def test_LeafNode_2_2(self):
1277        node_bytes = (b"type=leaf\n"
1278                      b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1279                      b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1280                      b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1281                      b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1282                      b""
1283                      )
1284        node = btree_index._LeafNode(node_bytes, 2, 2)
1285        # We do direct access, or don't care about order, to leaf nodes most of
1286        # the time, so a dict is useful:
1287        self.assertEqual({
1288            (b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1289            (b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1290                                          ((b'00', b'ref00'), (b'01', b'ref01')))),
1291            (b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1292                                          ((b'11', b'ref22'), (b'11', b'ref22')))),
1293            (b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1294            }, dict(node.all_items()))
1295
1296    def assertFlattened(self, expected, key, value, refs):
1297        flat_key, flat_line = self.parse_btree._flatten_node(
1298            (None, key, value, refs), bool(refs))
1299        self.assertEqual(b'\x00'.join(key), flat_key)
1300        self.assertEqual(expected, flat_line)
1301
1302    def test__flatten_node(self):
1303        self.assertFlattened(b'key\0\0value\n', (b'key',), b'value', [])
1304        self.assertFlattened(b'key\0tuple\0\0value str\n',
1305                             (b'key', b'tuple'), b'value str', [])
1306        self.assertFlattened(b'key\0tuple\0triple\0\0value str\n',
1307                             (b'key', b'tuple', b'triple'), b'value str', [])
1308        self.assertFlattened(b'k\0t\0s\0ref\0value str\n',
1309                             (b'k', b't', b's'), b'value str', [[(b'ref',)]])
1310        self.assertFlattened(b'key\0tuple\0ref\0key\0value str\n',
1311                             (b'key', b'tuple'), b'value str', [[(b'ref', b'key')]])
1312        self.assertFlattened(b"00\x0000\x00\t00\x00ref00\x00value:0\n",
1313                             (b'00', b'00'), b'value:0', ((), ((b'00', b'ref00'),)))
1314        self.assertFlattened(
1315            b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1316            (b'00', b'11'), b'value:1',
1317            (((b'00', b'ref00'),), ((b'00', b'ref00'), (b'01', b'ref01'))))
1318        self.assertFlattened(
1319            b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1320            (b'11', b'33'), b'value:3',
1321            (((b'11', b'ref22'),), ((b'11', b'ref22'), (b'11', b'ref22'))))
1322        self.assertFlattened(
1323            b"11\x0044\x00\t11\x00ref00\x00value:4\n",
1324            (b'11', b'44'), b'value:4', ((), ((b'11', b'ref00'),)))
1325
1326
1327class TestCompiledBtree(tests.TestCase):
1328
1329    def test_exists(self):
1330        # This is just to let the user know if they don't have the feature
1331        # available
1332        self.requireFeature(compiled_btreeparser_feature)
1333
1334
1335class TestMultiBisectRight(tests.TestCase):
1336
1337    def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1338        self.assertEqual(offsets,
1339                         btree_index.BTreeGraphIndex._multi_bisect_right(
1340                             search_keys, fixed_keys))
1341
1342    def test_after(self):
1343        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1344        self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1345                                    ['e', 'f', 'g'], ['a', 'b', 'c'])
1346
1347    def test_before(self):
1348        self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1349        self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1350                                    ['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1351
1352    def test_exact(self):
1353        self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1354        self.assertMultiBisectRight(
1355            [(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1356        self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1357                                    ['a', 'c'], ['a', 'b', 'c'])
1358
1359    def test_inbetween(self):
1360        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1361        self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1362                                    ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1363
1364    def test_mixed(self):
1365        self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1366                                     (4, ['g', 'h'])],
1367                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1368                                    ['c', 'd', 'f', 'g'])
1369
1370
1371class TestExpandOffsets(tests.TestCase):
1372
1373    def make_index(self, size, recommended_pages=None):
1374        """Make an index with a generic size.
1375
1376        This doesn't actually create anything on disk, it just primes a
1377        BTreeGraphIndex with the recommended information.
1378        """
1379        index = btree_index.BTreeGraphIndex(
1380            transport.get_transport_from_url('memory:///'),
1381            'test-index', size=size)
1382        if recommended_pages is not None:
1383            index._recommended_pages = recommended_pages
1384        return index
1385
1386    def set_cached_offsets(self, index, cached_offsets):
1387        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1388        def _get_offsets_to_cached_pages():
1389            cached = set(cached_offsets)
1390            return cached
1391        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1392
1393    def prepare_index(self, index, node_ref_lists, key_length, key_count,
1394                      row_lengths, cached_offsets):
1395        """Setup the BTreeGraphIndex with some pre-canned information."""
1396        index.node_ref_lists = node_ref_lists
1397        index._key_length = key_length
1398        index._key_count = key_count
1399        index._row_lengths = row_lengths
1400        index._compute_row_offsets()
1401        index._root_node = btree_index._InternalNode(b'internal\noffset=0\n')
1402        self.set_cached_offsets(index, cached_offsets)
1403
1404    def make_100_node_index(self):
1405        index = self.make_index(4096 * 100, 6)
1406        # Consider we've already made a single request at the middle
1407        self.prepare_index(index, node_ref_lists=0, key_length=1,
1408                           key_count=1000, row_lengths=[1, 99],
1409                           cached_offsets=[0, 50])
1410        return index
1411
1412    def make_1000_node_index(self):
1413        index = self.make_index(4096 * 1000, 6)
1414        # Pretend we've already made a single request in the middle
1415        self.prepare_index(index, node_ref_lists=0, key_length=1,
1416                           key_count=90000, row_lengths=[1, 9, 990],
1417                           cached_offsets=[0, 5, 500])
1418        return index
1419
1420    def assertNumPages(self, expected_pages, index, size):
1421        index._size = size
1422        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1423
1424    def assertExpandOffsets(self, expected, index, offsets):
1425        self.assertEqual(expected, index._expand_offsets(offsets),
1426                         'We did not get the expected value after expanding'
1427                         ' %s' % (offsets,))
1428
1429    def test_default_recommended_pages(self):
1430        index = self.make_index(None)
1431        # local transport recommends 4096 byte reads, which is 1 page
1432        self.assertEqual(1, index._recommended_pages)
1433
1434    def test__compute_total_pages_in_index(self):
1435        index = self.make_index(None)
1436        self.assertNumPages(1, index, 1024)
1437        self.assertNumPages(1, index, 4095)
1438        self.assertNumPages(1, index, 4096)
1439        self.assertNumPages(2, index, 4097)
1440        self.assertNumPages(2, index, 8192)
1441        self.assertNumPages(76, index, 4096 * 75 + 10)
1442
1443    def test__find_layer_start_and_stop(self):
1444        index = self.make_1000_node_index()
1445        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1446        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1447        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1448        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1449        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1450        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1451
1452    def test_unknown_size(self):
1453        # We should not expand if we don't know the file size
1454        index = self.make_index(None, 10)
1455        self.assertExpandOffsets([0], index, [0])
1456        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1457
1458    def test_more_than_recommended(self):
1459        index = self.make_index(4096 * 100, 2)
1460        self.assertExpandOffsets([1, 10], index, [1, 10])
1461        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1462
1463    def test_read_all_from_root(self):
1464        index = self.make_index(4096 * 10, 20)
1465        self.assertExpandOffsets(list(range(10)), index, [0])
1466
1467    def test_read_all_when_cached(self):
1468        # We've read enough that we can grab all the rest in a single request
1469        index = self.make_index(4096 * 10, 5)
1470        self.prepare_index(index, node_ref_lists=0, key_length=1,
1471                           key_count=1000, row_lengths=[1, 9],
1472                           cached_offsets=[0, 1, 2, 5, 6])
1473        # It should fill the remaining nodes, regardless of the one requested
1474        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1475        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1476        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1477
1478    def test_no_root_node(self):
1479        index = self.make_index(4096 * 10, 5)
1480        self.assertExpandOffsets([0], index, [0])
1481
1482    def test_include_neighbors(self):
1483        index = self.make_100_node_index()
1484        # We expand in both directions, until we have at least 'recommended'
1485        # pages
1486        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1487        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1488        # If we hit an 'edge' we continue in the other direction
1489        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1490        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1491
1492        # Requesting many nodes will expand all locations equally
1493        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1494        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1495                                 [2, 10, 81])
1496
1497    def test_stop_at_cached(self):
1498        index = self.make_100_node_index()
1499        self.set_cached_offsets(index, [0, 10, 19])
1500        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1501        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1502        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1503        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1504        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1505        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1506
1507    def test_cannot_fully_expand(self):
1508        index = self.make_100_node_index()
1509        self.set_cached_offsets(index, [0, 10, 12])
1510        # We don't go into an endless loop if we are bound by cached nodes
1511        self.assertExpandOffsets([11], index, [11])
1512
1513    def test_overlap(self):
1514        index = self.make_100_node_index()
1515        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1516        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1517
1518    def test_stay_within_layer(self):
1519        index = self.make_1000_node_index()
1520        # When expanding a request, we won't read nodes from the next layer
1521        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1522        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1523        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1524        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1525        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1526
1527        self.set_cached_offsets(index, [0, 4, 12])
1528        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1529        self.assertExpandOffsets([10, 11], index, [11])
1530
1531    def test_small_requests_unexpanded(self):
1532        index = self.make_100_node_index()
1533        self.set_cached_offsets(index, [0])
1534        self.assertExpandOffsets([1], index, [1])
1535        self.assertExpandOffsets([50], index, [50])
1536        # If we request more than one node, then we'll expand
1537        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1538
1539        # The first pass does not expand
1540        index = self.make_1000_node_index()
1541        self.set_cached_offsets(index, [0])
1542        self.assertExpandOffsets([1], index, [1])
1543        self.set_cached_offsets(index, [0, 1])
1544        self.assertExpandOffsets([100], index, [100])
1545        self.set_cached_offsets(index, [0, 1, 100])
1546        # But after the first depth, we will expand
1547        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1548        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1549        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1550        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1551                                 [105])
1552