1 /** 2 * A cascading Bloom filter 3 * Copyright 2013 Shaun Jackman 4 */ 5 #ifndef CascadingBLOOMFILTER_H 6 #define CascadingBLOOMFILTER_H 1 7 8 #include "Bloom/Bloom.h" 9 #include "BloomFilter.h" 10 #include <vector> 11 12 /** A Cascading Bloom filter. */ 13 class CascadingBloomFilter 14 { 15 public: 16 17 /** Constructor */ CascadingBloomFilter()18 CascadingBloomFilter() {} 19 20 /** Constructor */ m_hashSeed(hashSeed)21 CascadingBloomFilter(size_t n, size_t max_count, size_t hashSeed=0) : m_hashSeed(hashSeed) 22 { 23 m_data.reserve(max_count); 24 for (unsigned i = 0; i < max_count; i++) 25 m_data.push_back(new Konnector::BloomFilter(n, hashSeed)); 26 } 27 28 /** Destructor */ ~CascadingBloomFilter()29 ~CascadingBloomFilter() 30 { 31 typedef std::vector<Konnector::BloomFilter*>::iterator Iterator; 32 for (Iterator i = m_data.begin(); i != m_data.end(); i++) { 33 assert(*i != NULL); 34 delete *i; 35 } 36 } 37 38 /** Return the size of the bit array. */ size()39 size_t size() const 40 { 41 assert(m_data.back() != NULL); 42 return m_data.back()->size(); 43 } 44 45 /** Return the number of elements with count >= max_count. */ popcount()46 size_t popcount() const 47 { 48 assert(m_data.back() != NULL); 49 return m_data.back()->popcount(); 50 } 51 52 /** Return the estimated false positive rate */ FPR()53 double FPR() const 54 { 55 return (double)popcount() / size(); 56 } 57 58 /** Return whether the element with this index has count >= 59 * max_count. 60 */ 61 bool operator[](size_t i) const 62 { 63 assert(m_data.back() != NULL); 64 return (*m_data.back())[i]; 65 } 66 67 /** Return whether this element has count >= max_count. */ 68 bool operator[](const Bloom::key_type& key) const 69 { 70 assert(m_data.back() != NULL); 71 return (*m_data.back())[Bloom::hash(key, m_hashSeed) % m_data.back()->size()]; 72 } 73 74 /** Add the object with the specified index to this multiset. */ insert(size_t index)75 void insert(size_t index) 76 { 77 for (unsigned i = 0; i < m_data.size(); ++i) { 78 assert(m_data.at(i) != NULL); 79 if (!(*m_data[i])[index]) { 80 m_data[i]->insert(index); 81 break; 82 } 83 } 84 } 85 86 /** Add the object to this Cascading multiset. */ insert(const Bloom::key_type & key)87 void insert(const Bloom::key_type& key) 88 { 89 assert(m_data.back() != NULL); 90 insert(Bloom::hash(key, m_hashSeed) % m_data.back()->size()); 91 } 92 93 /** Get the Bloom filter for a given level */ getBloomFilter(unsigned level)94 Konnector::BloomFilter& getBloomFilter(unsigned level) 95 { 96 assert(m_data.at(level) != NULL); 97 return *m_data.at(level); 98 } 99 write(std::ostream & out)100 void write(std::ostream& out) const 101 { 102 assert(m_data.back() != NULL); 103 out << *m_data.back(); 104 } 105 106 /** Operator for writing the bloom filter to a stream */ 107 friend std::ostream& operator<<(std::ostream& out, const CascadingBloomFilter& o) 108 { 109 o.write(out); 110 return out; 111 } 112 113 private: 114 size_t m_hashSeed; 115 std::vector<Konnector::BloomFilter*> m_data; 116 117 }; 118 119 #endif 120