1# Copyright (C) 2013-2014 The python-bitcoinlib developers
2#
3# This file is part of python-bitcoinlib.
4#
5# It is subject to the license terms in the LICENSE file found in the top-level
6# directory of this distribution.
7#
8# No part of python-bitcoinlib, including this file, may be copied, modified,
9# propagated, or distributed except according to the terms contained in the
10# LICENSE file.
11
12"""Bloom filter support"""
13
14from __future__ import absolute_import, division, print_function, unicode_literals
15
16import struct
17import sys
18import math
19
20import bitcoin.core
21import bitcoin.core.serialize
22
23def _ROTL32(x, r):
24    assert x <= 0xFFFFFFFF
25    return ((x << r) & 0xFFFFFFFF) | (x >> (32 - r))
26
27def MurmurHash3(nHashSeed, vDataToHash):
28    """MurmurHash3 (x86_32)
29
30    Used for bloom filters. See http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
31    """
32
33    assert nHashSeed <= 0xFFFFFFFF
34
35    h1 = nHashSeed
36    c1 = 0xcc9e2d51
37    c2 = 0x1b873593
38
39    # body
40    i = 0
41    while (i < len(vDataToHash) - len(vDataToHash) % 4
42           and len(vDataToHash) - i >= 4):
43
44        k1 = struct.unpack(b"<L", vDataToHash[i:i+4])[0]
45
46        k1 = (k1 * c1) & 0xFFFFFFFF
47        k1 = _ROTL32(k1, 15)
48        k1 = (k1 * c2) & 0xFFFFFFFF
49
50        h1 ^= k1
51        h1 = _ROTL32(h1, 13)
52        h1 = (((h1*5) & 0xFFFFFFFF) + 0xe6546b64) & 0xFFFFFFFF
53
54        i += 4
55
56    # tail
57    k1 = 0
58    j = (len(vDataToHash) // 4) * 4
59    bord = ord
60    if sys.version > '3':
61        # In Py3 indexing bytes returns numbers, not characters
62        bord = lambda x: x
63    if len(vDataToHash) & 3 >= 3:
64        k1 ^= bord(vDataToHash[j+2]) << 16
65    if len(vDataToHash) & 3 >= 2:
66        k1 ^= bord(vDataToHash[j+1]) << 8
67    if len(vDataToHash) & 3 >= 1:
68        k1 ^= bord(vDataToHash[j])
69
70    k1 &= 0xFFFFFFFF
71    k1 = (k1 * c1) & 0xFFFFFFFF
72    k1 = _ROTL32(k1, 15)
73    k1 = (k1 * c2) & 0xFFFFFFFF
74    h1 ^= k1
75
76    # finalization
77    h1 ^= len(vDataToHash) & 0xFFFFFFFF
78    h1 ^= (h1 & 0xFFFFFFFF) >> 16
79    h1 *= 0x85ebca6b
80    h1 ^= (h1 & 0xFFFFFFFF) >> 13
81    h1 *= 0xc2b2ae35
82    h1 ^= (h1 & 0xFFFFFFFF) >> 16
83
84    return h1 & 0xFFFFFFFF
85
86
87class CBloomFilter(bitcoin.core.serialize.Serializable):
88    # 20,000 items with fp rate < 0.1% or 10,000 items and <0.0001%
89    MAX_BLOOM_FILTER_SIZE = 36000
90    MAX_HASH_FUNCS = 50
91
92    UPDATE_NONE = 0
93    UPDATE_ALL = 1
94    UPDATE_P2PUBKEY_ONLY = 2
95    UPDATE_MASK = 3
96
97    def __init__(self, nElements, nFPRate, nTweak, nFlags):
98        """Create a new bloom filter
99
100        The filter will have a given false-positive rate when filled with the
101        given number of elements.
102
103        Note that if the given parameters will result in a filter outside the
104        bounds of the protocol limits, the filter created will be as close to
105        the given parameters as possible within the protocol limits. This will
106        apply if nFPRate is very low or nElements is unreasonably high.
107
108        nTweak is a constant which is added to the seed value passed to the
109        hash function It should generally always be a random value (and is
110        largely only exposed for unit testing)
111
112        nFlags should be one of the UPDATE_* enums (but not _MASK)
113        """
114        LN2SQUARED = 0.4804530139182014246671025263266649717305529515945455
115        LN2 = 0.6931471805599453094172321214581765680755001343602552
116        self.vData = bytearray(int(min(-1  / LN2SQUARED * nElements * math.log(nFPRate), self.MAX_BLOOM_FILTER_SIZE * 8) / 8))
117        self.nHashFuncs = int(min(len(self.vData) * 8 / nElements * LN2, self.MAX_HASH_FUNCS))
118        self.nTweak = nTweak
119        self.nFlags = nFlags
120
121    def bloom_hash(self, nHashNum, vDataToHash):
122        return MurmurHash3(((nHashNum * 0xFBA4C795) + self.nTweak) & 0xFFFFFFFF, vDataToHash) % (len(self.vData) * 8)
123
124    __bit_mask = bytearray([0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80])
125
126    def insert(self, elem):
127        """Insert an element in the filter.
128
129        elem may be a COutPoint or bytes
130        """
131        if isinstance(elem, bitcoin.core.COutPoint):
132            elem = elem.serialize()
133
134        if len(self.vData) == 1 and self.vData[0] == 0xff:
135            return
136
137        for i in range(0, self.nHashFuncs):
138            nIndex = self.bloom_hash(i, elem)
139            # Sets bit nIndex of vData
140            self.vData[nIndex >> 3] |= self.__bit_mask[7 & nIndex]
141
142    def contains(self, elem):
143        """Test if the filter contains an element
144
145        elem may be a COutPoint or bytes
146        """
147        if isinstance(elem, bitcoin.core.COutPoint):
148            elem = elem.serialize()
149
150        if len(self.vData) == 1 and self.vData[0] == 0xff:
151            return True
152
153        for i in range(0, self.nHashFuncs):
154            nIndex = self.bloom_hash(i, elem)
155            if not (self.vData[nIndex >> 3] & self.__bit_mask[7 & nIndex]):
156                return False
157        return True
158
159    def IsWithinSizeConstraints(self):
160        return len(self.vData) <= self.MAX_BLOOM_FILTER_SIZE and self.nHashFuncs <= self.MAX_HASH_FUNCS
161
162    def IsRelevantAndUpdate(tx, tx_hash):
163        # Not useful for a client, so not implemented yet.
164        raise NotImplementedError
165
166    __struct = struct.Struct(b'<IIB')
167
168    @classmethod
169    def stream_deserialize(cls, f):
170        vData = bytearray(bitcoin.core.serialize.BytesSerializer.stream_deserialize(f))
171        (nHashFuncs,
172         nTweak,
173         nFlags) = CBloomFilter.__struct.unpack(bitcoin.core.ser_read(f, CBloomFilter.__struct.size))
174        # These arguments can be fake, the real values are set just after
175        deserialized = cls(1, 0.01, 0, CBloomFilter.UPDATE_ALL)
176        deserialized.vData = vData
177        deserialized.nHashFuncs = nHashFuncs
178        deserialized.nTweak = nTweak
179        deserialized.nFlags = nFlags
180        return deserialized
181
182    def stream_serialize(self, f):
183        if sys.version > '3':
184            bitcoin.core.serialize.BytesSerializer.stream_serialize(self.vData, f)
185        else:
186            # 2.7 has problems with f.write(bytearray())
187            bitcoin.core.serialize.BytesSerializer.stream_serialize(bytes(self.vData), f)
188        f.write(self.__struct.pack(self.nHashFuncs, self.nTweak, self.nFlags))
189
190__all__ = (
191        'MurmurHash3',
192        'CBloomFilter',
193)
194