1 /*  This file is part of Jellyfish.
2 
3     Jellyfish is free software: you can redistribute it and/or modify
4     it under the terms of the GNU General Public License as published by
5     the Free Software Foundation, either version 3 of the License, or
6     (at your option) any later version.
7 
8     Jellyfish is distributed in the hope that it will be useful,
9     but WITHOUT ANY WARRANTY; without even the implied warranty of
10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11     GNU General Public License for more details.
12 
13     You should have received a copy of the GNU General Public License
14     along with Jellyfish.  If not, see <http://www.gnu.org/licenses/>.
15 */
16 
17 
18 #ifndef __BLOOM_COUNTER2_HPP__
19 #define __BLOOM_COUNTER2_HPP__
20 
21 #include <assert.h>
22 #include <math.h>
23 #include <limits.h>
24 #include <jellyfish/bloom_common.hpp>
25 #include <jellyfish/mapped_file.hpp>
26 #include <jellyfish/allocators_mmap.hpp>
27 #include <jellyfish/divisor.hpp>
28 #include <jellyfish/atomic_gcc.hpp>
29 #include <jellyfish/atomic_field.hpp>
30 
31 #include <jellyfish/err.hpp>
32 
33 #ifdef HAVE_CONFIG_H
34 #include <config.h>
35 #endif
36 
37 namespace jellyfish {
38 /* Bloom counter with 3 values: 0, 1 or 2. It is thread safe and lock free.
39  */
40 template<typename Key, typename HashPair = hash_pair<Key>, typename atomic_t = ::atomic::gcc>
41 class bloom_counter2_base : public bloom_base<Key, bloom_counter2_base<Key, HashPair, atomic_t>, HashPair> {
42   typedef bloom_base<Key, bloom_counter2_base<Key, HashPair, atomic_t>, HashPair> super;
43 
44   atomic_t atomic_;
45 
46 
47 protected:
nb_bytes__(size_t l)48   static size_t nb_bytes__(size_t l) {
49     return l / 5 + (l % 5 != 0);
50   }
51 
52 public:
bloom_counter2_base(size_t m,unsigned long k,unsigned char * ptr,const HashPair & fns=HashPair ())53   bloom_counter2_base(size_t m, unsigned long k, unsigned char* ptr, const HashPair& fns = HashPair()) :
54     super(m, k, ptr, fns)
55   { }
bloom_counter2_base(bloom_counter2_base && rhs)56   bloom_counter2_base(bloom_counter2_base&& rhs) :
57     super(std::move(rhs))
58   { }
nb_bytes() const59   size_t nb_bytes() const {
60     return nb_bytes__(super::d_.d());
61   }
62 
63   // Insert key with given hashes
insert__(const uint64_t * hashes)64   unsigned int insert__(const uint64_t* hashes) {
65     // Prefetch memory locations
66     static_assert(std::is_pod<typename super::prefetch_info>::value, "prefetch_info must be a POD");
67     typename super::prefetch_info pinfo[super::k_];
68     const size_t base = super::d_.remainder(hashes[0]);
69     const size_t inc  = super::d_.remainder(hashes[1]);
70     for(unsigned long i = 0; i < super::k_; ++i) {
71       const size_t p   = super::d_.remainder(base + i * inc);
72       const size_t off = p / 5;
73       pinfo[i].boff    = p % 5;
74       pinfo[i].pos     = super::data_ + off;
75       //      prefetch_write_no(pinfo[i].pos);
76       __builtin_prefetch(pinfo[i].pos, 1, 0);
77     }
78 
79     // Insert element
80     unsigned char res = 2;
81     for(unsigned long i = 0; i < super::k_; ++i) {
82       size_t        boff = pinfo[i].boff;
83       unsigned char v    = jflib::a_load(pinfo[i].pos);
84 
85       while(true) {
86         unsigned char w = v;
87         switch(boff) {
88         case 0:          break;
89         case 1: w /= 3;  break;
90         case 2: w /= 9;  break;
91         case 3: w /= 27; break;
92         case 4: w /= 81; break;
93         }
94         w = w % 3;
95         if(w == 2) break;
96         unsigned char nv = v;
97 
98         switch(boff) {
99         case 0: nv += 1;  break;
100         case 1: nv += 3;  break;
101         case 2: nv += 9;  break;
102         case 3: nv += 27; break;
103         case 4: nv += 81; break;
104         }
105         unsigned char cv = atomic_.cas(pinfo[i].pos, v, nv);
106         if(cv == v) {
107           if(w < res)
108             res = w;
109           break;
110         }
111         v = cv;
112       }
113     }
114     return res;
115   }
116 
check__(uint64_t * hashes) const117   unsigned int check__(uint64_t *hashes) const {
118     // Prefetch memory locations
119     static_assert(std::is_pod<typename super::prefetch_info>::value, "prefetch_info must be a POD");
120     typename super::prefetch_info pinfo[super::k_];
121     const size_t base = super::d_.remainder(hashes[0]);
122     const size_t inc  = super::d_.remainder(hashes[1]);
123     for(unsigned long i = 0; i < super::k_; ++i) {
124       const size_t p   = super::d_.remainder(base + i * inc);
125       const size_t off = p / 5;
126       pinfo[i].boff    = p % 5;
127       pinfo[i].pos     = super::data_ + off;
128       //      prefetch_read_no(pinfo[i].pos);
129       __builtin_prefetch(pinfo[i].pos, 0, 0);
130     }
131 
132     // Check element
133     unsigned char res = 2;
134     for(unsigned long i = 0; i < super::k_; ++i) {
135       size_t        boff = pinfo[i].boff;
136       unsigned char w    = jflib::a_load(pinfo[i].pos);
137 
138       switch(boff) {
139       case 0:          break;
140       case 1: w /= 3;  break;
141       case 2: w /= 9;  break;
142       case 3: w /= 27; break;
143       case 4: w /= 81; break;
144       }
145       w = w % 3;
146       if(w < res)
147         res = w;
148     }
149     return res;
150   }
151 };
152 
153 template<typename Key, typename HashPair = hash_pair<Key>, typename atomic_t = ::atomic::gcc,
154          typename mem_block_t = allocators::mmap>
155 class bloom_counter2:
156     protected mem_block_t,
157     public bloom_counter2_base<Key, HashPair, atomic_t>
158 {
159   typedef bloom_counter2_base<Key, HashPair, atomic_t> super;
160 
161 public:
162   typedef typename super::key_type key_type;
163 
bloom_counter2(const double fp,const size_t n,const HashPair & fns=HashPair ())164   bloom_counter2(const double fp, const size_t n, const HashPair& fns = HashPair()) :
165     mem_block_t(super::nb_bytes__(super::opt_m(fp, n))),
166     super(super::opt_m(fp, n), super::opt_k(fp), (unsigned char*)mem_block_t::get_ptr(), fns)
167   {
168     if(!mem_block_t::get_ptr())
169       throw std::runtime_error(err::msg() << "Failed to allocate " << super::nb_bytes__(super::opt_m(fp, n))
170                                << " bytes of memory for bloom_counter");
171   }
172 
bloom_counter2(size_t m,unsigned long k,const HashPair & fns=HashPair ())173   bloom_counter2(size_t m, unsigned long k, const HashPair& fns = HashPair()) :
174     mem_block_t(super::nb_bytes__(m)),
175     super(m, k, (unsigned char*)mem_block_t::get_ptr(), fns)
176   {
177     if(!mem_block_t::get_ptr())
178       throw std::runtime_error(err::msg() << "Failed to allocate " << super::nb_bytes__(m) << " bytes of memory for bloom_counter");
179   }
180 
bloom_counter2(size_t m,unsigned long k,std::istream & is,const HashPair & fns=HashPair ())181   bloom_counter2(size_t m, unsigned long k, std::istream& is, const HashPair& fns = HashPair()) :
182     mem_block_t(super::nb_bytes__(m)),
183     super(m, k, (unsigned char*)mem_block_t::get_ptr(), fns)
184   {
185     if(!mem_block_t::get_ptr())
186       throw std::runtime_error(err::msg() << "Failed to allocate " << super::nb_bytes__(m) << " bytes of memory for bloom_counter");
187 
188     is.read((char*)mem_block_t::get_ptr(), mem_block_t::get_size());
189   }
190 
191   bloom_counter2(const bloom_counter2& rhs) = delete;
bloom_counter2(bloom_counter2 && rhs)192   bloom_counter2(bloom_counter2&& rhs) :
193     mem_block_t(std::move(rhs)),
194     super(std::move(rhs))
195   { }
196 };
197 
198 template<typename Key, typename HashPair = hash_pair<Key>, typename atomic_t = ::atomic::gcc>
199 class bloom_counter2_file :
200     protected mapped_file,
201     public bloom_counter2_base<Key, HashPair, atomic_t>
202 {
203   typedef bloom_counter2_base<Key, HashPair, atomic_t> super;
204 public:
205   typedef typename super::key_type key_type;
206 
bloom_counter2_file(size_t m,unsigned long k,const char * path,const HashPair & fns=HashPair (),off_t offset=0)207   bloom_counter2_file(size_t m, unsigned long k, const char* path, const HashPair& fns = HashPair(), off_t offset = 0) :
208     mapped_file(path),
209     super(m, k, (unsigned char*)mapped_file::base() + offset, fns)
210   { }
211 
212   bloom_counter2_file(const bloom_counter2_file& rhs) = delete;
bloom_counter2_file(bloom_counter2_file && rhs)213   bloom_counter2_file(bloom_counter2_file&& rhs) :
214     mapped_file(std::move(rhs)),
215     super(std::move(rhs))
216   { }
217 };
218 
219 } // namespace jellyfish {
220 
221 #endif // __BLOOM_COUNTER2_HPP__
222