1 /*  This file is part of Jellyfish.
2 
3     This work is dual-licensed under 3-Clause BSD License or GPL 3.0.
4     You can choose between one of them if you use this work.
5 
6 `SPDX-License-Identifier: BSD-3-Clause OR  GPL-3.0`
7 */
8 
9 #ifndef __JELLYFISH_BLOOM_COMMON_HPP__
10 #define __JELLYFISH_BLOOM_COMMON_HPP__
11 
12 #include <math.h>
13 #include <jellyfish/divisor.hpp>
14 #include <jellyfish/atomic_gcc.hpp>
15 
16 namespace jellyfish {
17 template<typename Key>
18 struct hash_pair { };
19 
20 template<typename Key, typename Derived, typename HashPair = hash_pair<Key> >
21 class bloom_base {
22 protected:
23   struct prefetch_info {
24     size_t         boff;
25     unsigned char* pos;
26   };
27 
28   // The number of bits in the structure, previously known as m_, is
29   // know stored as d_.d()
30   const jflib::divisor64 d_;
31   const unsigned long    k_;
32   unsigned char * const  data_;
33   HashPair               hash_fns_;
34 
35 public:
36   typedef Key key_type;
37 
bloom_base(size_t m,unsigned long k,unsigned char * ptr,const HashPair & fns=HashPair ())38   bloom_base(size_t m, unsigned long k, unsigned char* ptr, const HashPair& fns = HashPair()) :
39     d_(m), k_(k), data_(ptr), hash_fns_(fns)
40   { }
41 
42   bloom_base(const bloom_base& rhs) = delete;
bloom_base(bloom_base && rhs)43   bloom_base(bloom_base&& rhs) :
44     d_(rhs.d_), k_(rhs.k_), data_(rhs.data_), hash_fns_(std::move(rhs.hash_fns_))
45   { }
46 
47 
write_bits(std::ostream & out)48   void write_bits(std::ostream& out) {
49     out.write((char*)data_, static_cast<Derived*>(this)->nb_bytes());
50   }
51 
52   // Number of hash functions
k() const53   unsigned long k() const { return k_; }
54   // Size of bit vector
m() const55   size_t m() const { return d_.d(); }
hash_functions() const56   const HashPair& hash_functions() const { return hash_fns_; }
57 
58   static const double LOG2;
59   static const double LOG2_SQ;
60 
opt_m(const double fp,const size_t n)61   static size_t opt_m(const double fp, const size_t n) {
62     return n * (size_t)lrint(-log(fp) / LOG2_SQ);
63   }
opt_k(const double fp)64   static unsigned long opt_k(const double fp) {
65     return lrint(-log(fp) / LOG2);
66   }
67 
68   // Insert key k. Returns previous value of k
insert(const Key & k)69   unsigned int insert(const Key &k) {
70     uint64_t hashes[2];
71     hash_fns_(k, hashes);
72     return static_cast<Derived*>(this)->insert__(hashes);
73   }
74 
check(const Key & k) const75   unsigned int check(const Key &k) const {
76     uint64_t hashes[2];
77     hash_fns_(k, hashes);
78     return static_cast<const Derived*>(this)->check__(hashes);
79   }
80 
81 
82 
83   // Limited std::map interface compatibility
84   class element_proxy {
85     Derived&   bc_;
86     const Key& k_;
87 
88   public:
element_proxy(Derived & bc,const Key & k)89     element_proxy(Derived& bc, const Key& k) : bc_(bc), k_(k) { }
90 
operator ++()91     unsigned int operator++() {
92       unsigned int res = bc_.insert(k_);
93       return res == 0 ? 1 : 2;
94     }
95 
operator ++(int)96     unsigned int operator++(int) { return bc_.insert(k_); }
operator *() const97     unsigned int operator*() const { return bc_.check(k_); }
operator unsigned int() const98     operator unsigned int() const { return bc_.check(k_); }
99   };
100 
101   class const_element_proxy {
102     const Derived& bc_;
103     const Key&     k_;
104 
105   public:
const_element_proxy(const Derived & bc,const Key & k)106     const_element_proxy(const Derived& bc, const Key& k) : bc_(bc), k_(k) { }
107 
operator *() const108     unsigned int operator*() const { return bc_.check(k_); }
operator unsigned int() const109     operator unsigned int() const { return bc_.check(k_); }
110   };
operator [](const Key & k)111   element_proxy operator[](const Key& k) { return element_proxy(*static_cast<Derived*>(this), k); }
operator [](const Key & k) const112   const_element_proxy operator[](const Key& k) const { return const_element_proxy(*static_cast<const Derived*>(this), k); }
113 };
114 template<typename Key, typename Derived, typename HashPair>
115 const double bloom_base<Key, Derived, HashPair>::LOG2 = 0.6931471805599453;
116 template<typename Key, typename Derived, typename HashPair>
117 const double bloom_base<Key, Derived, HashPair>::LOG2_SQ = 0.4804530139182014;
118 
119 }
120 
121 #endif /* __JELLYFISH_BLOOM_COMMON_HPP__ */
122