1# Copyright (C) 2013-2014 The python-bitcoinlib developers 2# 3# This file is part of python-bitcoinlib. 4# 5# It is subject to the license terms in the LICENSE file found in the top-level 6# directory of this distribution. 7# 8# No part of python-bitcoinlib, including this file, may be copied, modified, 9# propagated, or distributed except according to the terms contained in the 10# LICENSE file. 11 12"""Bloom filter support""" 13 14from __future__ import absolute_import, division, print_function, unicode_literals 15 16import struct 17import sys 18import math 19 20import bitcoin.core 21import bitcoin.core.serialize 22 23def _ROTL32(x, r): 24 assert x <= 0xFFFFFFFF 25 return ((x << r) & 0xFFFFFFFF) | (x >> (32 - r)) 26 27def MurmurHash3(nHashSeed, vDataToHash): 28 """MurmurHash3 (x86_32) 29 30 Used for bloom filters. See http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp 31 """ 32 33 assert nHashSeed <= 0xFFFFFFFF 34 35 h1 = nHashSeed 36 c1 = 0xcc9e2d51 37 c2 = 0x1b873593 38 39 # body 40 i = 0 41 while (i < len(vDataToHash) - len(vDataToHash) % 4 42 and len(vDataToHash) - i >= 4): 43 44 k1 = struct.unpack(b"<L", vDataToHash[i:i+4])[0] 45 46 k1 = (k1 * c1) & 0xFFFFFFFF 47 k1 = _ROTL32(k1, 15) 48 k1 = (k1 * c2) & 0xFFFFFFFF 49 50 h1 ^= k1 51 h1 = _ROTL32(h1, 13) 52 h1 = (((h1*5) & 0xFFFFFFFF) + 0xe6546b64) & 0xFFFFFFFF 53 54 i += 4 55 56 # tail 57 k1 = 0 58 j = (len(vDataToHash) // 4) * 4 59 bord = ord 60 if sys.version > '3': 61 # In Py3 indexing bytes returns numbers, not characters 62 bord = lambda x: x 63 if len(vDataToHash) & 3 >= 3: 64 k1 ^= bord(vDataToHash[j+2]) << 16 65 if len(vDataToHash) & 3 >= 2: 66 k1 ^= bord(vDataToHash[j+1]) << 8 67 if len(vDataToHash) & 3 >= 1: 68 k1 ^= bord(vDataToHash[j]) 69 70 k1 &= 0xFFFFFFFF 71 k1 = (k1 * c1) & 0xFFFFFFFF 72 k1 = _ROTL32(k1, 15) 73 k1 = (k1 * c2) & 0xFFFFFFFF 74 h1 ^= k1 75 76 # finalization 77 h1 ^= len(vDataToHash) & 0xFFFFFFFF 78 h1 ^= (h1 & 0xFFFFFFFF) >> 16 79 h1 *= 0x85ebca6b 80 h1 ^= (h1 & 0xFFFFFFFF) >> 13 81 h1 *= 0xc2b2ae35 82 h1 ^= (h1 & 0xFFFFFFFF) >> 16 83 84 return h1 & 0xFFFFFFFF 85 86 87class CBloomFilter(bitcoin.core.serialize.Serializable): 88 # 20,000 items with fp rate < 0.1% or 10,000 items and <0.0001% 89 MAX_BLOOM_FILTER_SIZE = 36000 90 MAX_HASH_FUNCS = 50 91 92 UPDATE_NONE = 0 93 UPDATE_ALL = 1 94 UPDATE_P2PUBKEY_ONLY = 2 95 UPDATE_MASK = 3 96 97 def __init__(self, nElements, nFPRate, nTweak, nFlags): 98 """Create a new bloom filter 99 100 The filter will have a given false-positive rate when filled with the 101 given number of elements. 102 103 Note that if the given parameters will result in a filter outside the 104 bounds of the protocol limits, the filter created will be as close to 105 the given parameters as possible within the protocol limits. This will 106 apply if nFPRate is very low or nElements is unreasonably high. 107 108 nTweak is a constant which is added to the seed value passed to the 109 hash function It should generally always be a random value (and is 110 largely only exposed for unit testing) 111 112 nFlags should be one of the UPDATE_* enums (but not _MASK) 113 """ 114 LN2SQUARED = 0.4804530139182014246671025263266649717305529515945455 115 LN2 = 0.6931471805599453094172321214581765680755001343602552 116 self.vData = bytearray(int(min(-1 / LN2SQUARED * nElements * math.log(nFPRate), self.MAX_BLOOM_FILTER_SIZE * 8) / 8)) 117 self.nHashFuncs = int(min(len(self.vData) * 8 / nElements * LN2, self.MAX_HASH_FUNCS)) 118 self.nTweak = nTweak 119 self.nFlags = nFlags 120 121 def bloom_hash(self, nHashNum, vDataToHash): 122 return MurmurHash3(((nHashNum * 0xFBA4C795) + self.nTweak) & 0xFFFFFFFF, vDataToHash) % (len(self.vData) * 8) 123 124 __bit_mask = bytearray([0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80]) 125 126 def insert(self, elem): 127 """Insert an element in the filter. 128 129 elem may be a COutPoint or bytes 130 """ 131 if isinstance(elem, bitcoin.core.COutPoint): 132 elem = elem.serialize() 133 134 if len(self.vData) == 1 and self.vData[0] == 0xff: 135 return 136 137 for i in range(0, self.nHashFuncs): 138 nIndex = self.bloom_hash(i, elem) 139 # Sets bit nIndex of vData 140 self.vData[nIndex >> 3] |= self.__bit_mask[7 & nIndex] 141 142 def contains(self, elem): 143 """Test if the filter contains an element 144 145 elem may be a COutPoint or bytes 146 """ 147 if isinstance(elem, bitcoin.core.COutPoint): 148 elem = elem.serialize() 149 150 if len(self.vData) == 1 and self.vData[0] == 0xff: 151 return True 152 153 for i in range(0, self.nHashFuncs): 154 nIndex = self.bloom_hash(i, elem) 155 if not (self.vData[nIndex >> 3] & self.__bit_mask[7 & nIndex]): 156 return False 157 return True 158 159 def IsWithinSizeConstraints(self): 160 return len(self.vData) <= self.MAX_BLOOM_FILTER_SIZE and self.nHashFuncs <= self.MAX_HASH_FUNCS 161 162 def IsRelevantAndUpdate(tx, tx_hash): 163 # Not useful for a client, so not implemented yet. 164 raise NotImplementedError 165 166 __struct = struct.Struct(b'<IIB') 167 168 @classmethod 169 def stream_deserialize(cls, f): 170 vData = bytearray(bitcoin.core.serialize.BytesSerializer.stream_deserialize(f)) 171 (nHashFuncs, 172 nTweak, 173 nFlags) = CBloomFilter.__struct.unpack(bitcoin.core.ser_read(f, CBloomFilter.__struct.size)) 174 # These arguments can be fake, the real values are set just after 175 deserialized = cls(1, 0.01, 0, CBloomFilter.UPDATE_ALL) 176 deserialized.vData = vData 177 deserialized.nHashFuncs = nHashFuncs 178 deserialized.nTweak = nTweak 179 deserialized.nFlags = nFlags 180 return deserialized 181 182 def stream_serialize(self, f): 183 if sys.version > '3': 184 bitcoin.core.serialize.BytesSerializer.stream_serialize(self.vData, f) 185 else: 186 # 2.7 has problems with f.write(bytearray()) 187 bitcoin.core.serialize.BytesSerializer.stream_serialize(bytes(self.vData), f) 188 f.write(self.__struct.pack(self.nHashFuncs, self.nTweak, self.nFlags)) 189 190__all__ = ( 191 'MurmurHash3', 192 'CBloomFilter', 193) 194