1# Copyright (C) 2010 Canonical Ltd 2# 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License as published by 5# the Free Software Foundation; either version 2 of the License, or 6# (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16# 17 18"""Direct tests of the btree serializer extension""" 19 20import binascii 21import bisect 22 23from ... import tests 24 25from .test_btree_index import compiled_btreeparser_feature 26 27 28class TestBtreeSerializer(tests.TestCase): 29 30 _test_needs_features = [compiled_btreeparser_feature] 31 32 @property 33 def module(self): 34 return compiled_btreeparser_feature.module 35 36 37class TestHexAndUnhex(TestBtreeSerializer): 38 39 def assertHexlify(self, as_binary): 40 self.assertEqual(binascii.hexlify(as_binary), 41 self.module._py_hexlify(as_binary)) 42 43 def assertUnhexlify(self, as_hex): 44 ba_unhex = binascii.unhexlify(as_hex) 45 mod_unhex = self.module._py_unhexlify(as_hex) 46 if ba_unhex != mod_unhex: 47 if mod_unhex is None: 48 mod_hex = b'<None>' 49 else: 50 mod_hex = binascii.hexlify(mod_unhex) 51 self.fail('_py_unhexlify returned a different answer' 52 ' from binascii:\n %r\n != %r' 53 % (binascii.hexlify(ba_unhex), mod_hex)) 54 55 def assertFailUnhexlify(self, as_hex): 56 # Invalid hex content 57 self.assertIs(None, self.module._py_unhexlify(as_hex)) 58 59 def test_to_hex(self): 60 raw_bytes = bytes(range(256)) 61 for i in range(0, 240, 20): 62 self.assertHexlify(raw_bytes[i:i + 20]) 63 self.assertHexlify(raw_bytes[240:] + raw_bytes[0:4]) 64 65 def test_from_hex(self): 66 self.assertUnhexlify(b'0123456789abcdef0123456789abcdef01234567') 67 self.assertUnhexlify(b'123456789abcdef0123456789abcdef012345678') 68 self.assertUnhexlify(b'0123456789ABCDEF0123456789ABCDEF01234567') 69 self.assertUnhexlify(b'123456789ABCDEF0123456789ABCDEF012345678') 70 hex_chars = binascii.hexlify(bytes(range(256))) 71 for i in range(0, 480, 40): 72 self.assertUnhexlify(hex_chars[i:i + 40]) 73 self.assertUnhexlify(hex_chars[480:] + hex_chars[0:8]) 74 75 def test_from_invalid_hex(self): 76 self.assertFailUnhexlify(b'123456789012345678901234567890123456789X') 77 self.assertFailUnhexlify(b'12345678901234567890123456789012345678X9') 78 79 def test_bad_argument(self): 80 self.assertRaises(ValueError, self.module._py_unhexlify, u'1a') 81 self.assertRaises(ValueError, self.module._py_unhexlify, b'1b') 82 83 84_hex_form = b'123456789012345678901234567890abcdefabcd' 85 86 87class Test_KeyToSha1(TestBtreeSerializer): 88 89 def assertKeyToSha1(self, expected, key): 90 if expected is None: 91 expected_bin = None 92 else: 93 expected_bin = binascii.unhexlify(expected) 94 actual_sha1 = self.module._py_key_to_sha1(key) 95 if expected_bin != actual_sha1: 96 actual_hex_sha1 = None 97 if actual_sha1 is not None: 98 actual_hex_sha1 = binascii.hexlify(actual_sha1) 99 self.fail('_key_to_sha1 returned:\n %s\n != %s' 100 % (actual_sha1, expected)) 101 102 def test_simple(self): 103 self.assertKeyToSha1(_hex_form, (b'sha1:' + _hex_form,)) 104 105 def test_invalid_not_tuple(self): 106 self.assertKeyToSha1(None, _hex_form) 107 self.assertKeyToSha1(None, b'sha1:' + _hex_form) 108 109 def test_invalid_empty(self): 110 self.assertKeyToSha1(None, ()) 111 112 def test_invalid_not_string(self): 113 self.assertKeyToSha1(None, (None,)) 114 self.assertKeyToSha1(None, (list(_hex_form),)) 115 116 def test_invalid_not_sha1(self): 117 self.assertKeyToSha1(None, (_hex_form,)) 118 self.assertKeyToSha1(None, (b'sha2:' + _hex_form,)) 119 120 def test_invalid_not_hex(self): 121 self.assertKeyToSha1(None, 122 (b'sha1:abcdefghijklmnopqrstuvwxyz12345678901234',)) 123 124 125class Test_Sha1ToKey(TestBtreeSerializer): 126 127 def assertSha1ToKey(self, hex_sha1): 128 bin_sha1 = binascii.unhexlify(hex_sha1) 129 key = self.module._py_sha1_to_key(bin_sha1) 130 self.assertEqual((b'sha1:' + hex_sha1,), key) 131 132 def test_simple(self): 133 self.assertSha1ToKey(_hex_form) 134 135 136_one_key_content = b"""type=leaf 137sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4 138""" 139 140_large_offsets = b"""type=leaf 141sha1:123456789012345678901234567890abcdefabcd\x00\x0012345678901 1234567890 0 1 142sha1:abcd123456789012345678901234567890abcdef\x00\x002147483648 2147483647 0 1 143sha1:abcdefabcd123456789012345678901234567890\x00\x004294967296 4294967295 4294967294 1 144""" 145 146_multi_key_content = b"""type=leaf 147sha1:c80c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0 148sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1 149sha1:c8e240de74fb1ed08fa08d38063f6a6a91462a81\x00\x002 2 2 2 150sha1:cda39a3ee5e6b4b0d3255bfef95601890afd8070\x00\x003 3 3 3 151sha1:cdf51e37c269aa94d38f93e537bf6e2020b21406\x00\x004 4 4 4 152sha1:ce0c9035898dd52fc65c41454cec9c4d2611bfb3\x00\x005 5 5 5 153sha1:ce93b4e3c464ffd51732fbd6ded717e9efda28aa\x00\x006 6 6 6 154sha1:cf7a9e24777ec23212c54d7a350bc5bea5477fdb\x00\x007 7 7 7 155""" 156 157_multi_key_same_offset = b"""type=leaf 158sha1:080c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0 159sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1 160sha1:cd0c9035898dd52fc65c41454cec9c4d2611bfb3\x00\x002 2 2 2 161sha1:cda39a3ee5e6b4b0d3255bfef95601890afd8070\x00\x003 3 3 3 162sha1:cde240de74fb1ed08fa08d38063f6a6a91462a81\x00\x004 4 4 4 163sha1:cdf51e37c269aa94d38f93e537bf6e2020b21406\x00\x005 5 5 5 164sha1:ce7a9e24777ec23212c54d7a350bc5bea5477fdb\x00\x006 6 6 6 165sha1:ce93b4e3c464ffd51732fbd6ded717e9efda28aa\x00\x007 7 7 7 166""" 167 168_common_32_bits = b"""type=leaf 169sha1:123456784a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0 170sha1:1234567874fb1ed08fa08d38063f6a6a91462a81\x00\x001 1 1 1 171sha1:12345678777ec23212c54d7a350bc5bea5477fdb\x00\x002 2 2 2 172sha1:123456787faa5a7fce15d1ddcb9eaeaea377667b\x00\x003 3 3 3 173sha1:12345678898dd52fc65c41454cec9c4d2611bfb3\x00\x004 4 4 4 174sha1:12345678c269aa94d38f93e537bf6e2020b21406\x00\x005 5 5 5 175sha1:12345678c464ffd51732fbd6ded717e9efda28aa\x00\x006 6 6 6 176sha1:12345678e5e6b4b0d3255bfef95601890afd8070\x00\x007 7 7 7 177""" 178 179 180class TestGCCKHSHA1LeafNode(TestBtreeSerializer): 181 182 def assertInvalid(self, data): 183 """Ensure that we get a proper error when trying to parse invalid bytes. 184 185 (mostly this is testing that bad input doesn't cause us to segfault) 186 """ 187 self.assertRaises( 188 (ValueError, TypeError), self.module._parse_into_chk, data, 1, 0) 189 190 def test_non_bytes(self): 191 self.assertInvalid(u'type=leaf\n') 192 193 def test_not_leaf(self): 194 self.assertInvalid(b'type=internal\n') 195 196 def test_empty_leaf(self): 197 leaf = self.module._parse_into_chk(b'type=leaf\n', 1, 0) 198 self.assertEqual(0, len(leaf)) 199 self.assertEqual([], leaf.all_items()) 200 self.assertEqual([], leaf.all_keys()) 201 # It should allow any key to be queried 202 self.assertFalse(('key',) in leaf) 203 204 def test_one_key_leaf(self): 205 leaf = self.module._parse_into_chk(_one_key_content, 1, 0) 206 self.assertEqual(1, len(leaf)) 207 sha_key = (b'sha1:' + _hex_form,) 208 self.assertEqual([sha_key], leaf.all_keys()) 209 self.assertEqual([(sha_key, (b'1 2 3 4', ()))], leaf.all_items()) 210 self.assertTrue(sha_key in leaf) 211 212 def test_large_offsets(self): 213 leaf = self.module._parse_into_chk(_large_offsets, 1, 0) 214 self.assertEqual([b'12345678901 1234567890 0 1', 215 b'2147483648 2147483647 0 1', 216 b'4294967296 4294967295 4294967294 1', 217 ], [x[1][0] for x in leaf.all_items()]) 218 219 def test_many_key_leaf(self): 220 leaf = self.module._parse_into_chk(_multi_key_content, 1, 0) 221 self.assertEqual(8, len(leaf)) 222 all_keys = leaf.all_keys() 223 self.assertEqual(8, len(leaf.all_keys())) 224 for idx, key in enumerate(all_keys): 225 self.assertEqual(b'%d' % idx, leaf[key][0].split()[0]) 226 227 def test_common_shift(self): 228 # The keys were deliberately chosen so that the first 5 bits all 229 # overlapped, it also happens that a later bit overlaps 230 # Note that by 'overlap' we mean that given bit is either on in all 231 # keys, or off in all keys 232 leaf = self.module._parse_into_chk(_multi_key_content, 1, 0) 233 self.assertEqual(19, leaf.common_shift) 234 # The interesting byte for each key is 235 # (defined as the 8-bits that come after the common prefix) 236 lst = [1, 13, 28, 180, 190, 193, 210, 239] 237 offsets = leaf._get_offsets() 238 self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)], 239 offsets) 240 for idx, val in enumerate(lst): 241 self.assertEqual(idx, offsets[val]) 242 for idx, key in enumerate(leaf.all_keys()): 243 self.assertEqual(b'%d' % idx, leaf[key][0].split()[0]) 244 245 def test_multi_key_same_offset(self): 246 # there is no common prefix, though there are some common bits 247 leaf = self.module._parse_into_chk(_multi_key_same_offset, 1, 0) 248 self.assertEqual(24, leaf.common_shift) 249 offsets = leaf._get_offsets() 250 # The interesting byte is just the first 8-bits of the key 251 lst = [8, 200, 205, 205, 205, 205, 206, 206] 252 self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)], 253 offsets) 254 for val in lst: 255 self.assertEqual(lst.index(val), offsets[val]) 256 for idx, key in enumerate(leaf.all_keys()): 257 self.assertEqual(b'%d' % idx, leaf[key][0].split()[0]) 258 259 def test_all_common_prefix(self): 260 # The first 32 bits of all hashes are the same. This is going to be 261 # pretty much impossible, but I don't want to fail because of this 262 leaf = self.module._parse_into_chk(_common_32_bits, 1, 0) 263 self.assertEqual(0, leaf.common_shift) 264 lst = [0x78] * 8 265 offsets = leaf._get_offsets() 266 self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)], 267 offsets) 268 for val in lst: 269 self.assertEqual(lst.index(val), offsets[val]) 270 for idx, key in enumerate(leaf.all_keys()): 271 self.assertEqual(b'%d' % idx, leaf[key][0].split()[0]) 272 273 def test_many_entries(self): 274 # Again, this is almost impossible, but we should still work 275 # It would be hard to fit more that 120 entries in a 4k page, much less 276 # more than 256 of them. but hey, weird stuff happens sometimes 277 lines = [b'type=leaf\n'] 278 for i in range(500): 279 key_str = b'sha1:%04x%s' % (i, _hex_form[:36]) 280 key = (key_str,) 281 lines.append(b'%s\0\0%d %d %d %d\n' % (key_str, i, i, i, i)) 282 data = b''.join(lines) 283 leaf = self.module._parse_into_chk(data, 1, 0) 284 self.assertEqual(24 - 7, leaf.common_shift) 285 offsets = leaf._get_offsets() 286 # This is the interesting bits for each entry 287 lst = [x // 2 for x in range(500)] 288 expected_offsets = [x * 2 for x in range(128)] + [255] * 129 289 self.assertEqual(expected_offsets, offsets) 290 # We truncate because offsets is an unsigned char. So the bisection 291 # will just say 'greater than the last one' for all the rest 292 lst = lst[:255] 293 self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)], 294 offsets) 295 for val in lst: 296 self.assertEqual(lst.index(val), offsets[val]) 297 for idx, key in enumerate(leaf.all_keys()): 298 self.assertEqual(b'%d' % idx, leaf[key][0].split()[0]) 299 300 def test__sizeof__(self): 301 # We can't use the exact numbers because of platform variations, etc. 302 # But what we really care about is that it does get bigger with more 303 # content. 304 leaf0 = self.module._parse_into_chk(b'type=leaf\n', 1, 0) 305 leaf1 = self.module._parse_into_chk(_one_key_content, 1, 0) 306 leafN = self.module._parse_into_chk(_multi_key_content, 1, 0) 307 sizeof_1 = leaf1.__sizeof__() - leaf0.__sizeof__() 308 self.assertTrue(sizeof_1 > 0) 309 sizeof_N = leafN.__sizeof__() - leaf0.__sizeof__() 310 self.assertEqual(sizeof_1 * len(leafN), sizeof_N) 311