1 /**
2  * A cascading Bloom filter
3  * Copyright 2013 Shaun Jackman
4  */
5 #ifndef CascadingBLOOMFILTER_H
6 #define CascadingBLOOMFILTER_H 1
7 
8 #include "Bloom/Bloom.h"
9 #include "BloomFilter.h"
10 #include <vector>
11 
12 /** A Cascading Bloom filter. */
13 class CascadingBloomFilter
14 {
15   public:
16 
17 	/** Constructor */
CascadingBloomFilter()18 	CascadingBloomFilter() {}
19 
20 	/** Constructor */
m_hashSeed(hashSeed)21 	CascadingBloomFilter(size_t n, size_t max_count, size_t hashSeed=0) : m_hashSeed(hashSeed)
22 	{
23 		m_data.reserve(max_count);
24 		for (unsigned i = 0; i < max_count; i++)
25 			m_data.push_back(new Konnector::BloomFilter(n, hashSeed));
26 	}
27 
28 	/** Destructor */
~CascadingBloomFilter()29 	~CascadingBloomFilter()
30 	{
31 		typedef std::vector<Konnector::BloomFilter*>::iterator Iterator;
32 		for (Iterator i = m_data.begin(); i != m_data.end(); i++) {
33 			assert(*i != NULL);
34 			delete *i;
35 		}
36 	}
37 
38 	/** Return the size of the bit array. */
size()39 	size_t size() const
40 	{
41 		assert(m_data.back() != NULL);
42 		return m_data.back()->size();
43 	}
44 
45 	/** Return the number of elements with count >= max_count. */
popcount()46 	size_t popcount() const
47 	{
48 		assert(m_data.back() != NULL);
49 		return m_data.back()->popcount();
50 	}
51 
52 	/** Return the estimated false positive rate */
FPR()53 	double FPR() const
54 	{
55 		return (double)popcount() / size();
56 	}
57 
58 	/** Return whether the element with this index has count >=
59 	 * max_count.
60 	 */
61 	bool operator[](size_t i) const
62 	{
63 		assert(m_data.back() != NULL);
64 		return (*m_data.back())[i];
65 	}
66 
67 	/** Return whether this element has count >= max_count. */
68 	bool operator[](const Bloom::key_type& key) const
69 	{
70 		assert(m_data.back() != NULL);
71 		return (*m_data.back())[Bloom::hash(key, m_hashSeed) % m_data.back()->size()];
72 	}
73 
74 	/** Add the object with the specified index to this multiset. */
insert(size_t index)75 	void insert(size_t index)
76 	{
77 		for (unsigned i = 0; i < m_data.size(); ++i) {
78 			assert(m_data.at(i) != NULL);
79 			if (!(*m_data[i])[index]) {
80 				m_data[i]->insert(index);
81 				break;
82 			}
83 		}
84 	}
85 
86 	/** Add the object to this Cascading multiset. */
insert(const Bloom::key_type & key)87 	void insert(const Bloom::key_type& key)
88 	{
89 		assert(m_data.back() != NULL);
90 		insert(Bloom::hash(key, m_hashSeed) % m_data.back()->size());
91 	}
92 
93 	/** Get the Bloom filter for a given level */
getBloomFilter(unsigned level)94 	Konnector::BloomFilter& getBloomFilter(unsigned level)
95 	{
96 		assert(m_data.at(level) != NULL);
97 		return *m_data.at(level);
98 	}
99 
write(std::ostream & out)100 	void write(std::ostream& out) const
101 	{
102 		assert(m_data.back() != NULL);
103 		out << *m_data.back();
104 	}
105 
106 	/** Operator for writing the bloom filter to a stream */
107 	friend std::ostream& operator<<(std::ostream& out, const CascadingBloomFilter& o)
108 	{
109 		o.write(out);
110 		return out;
111 	}
112 
113   private:
114 	size_t m_hashSeed;
115 	std::vector<Konnector::BloomFilter*> m_data;
116 
117 };
118 
119 #endif
120