1 #ifndef KALLISTO_KMERHASHTABLE_H
2 #define KALLISTO_KMERHASHTABLE_H
3 
4 #include <utility>
5 #include <string>
6 #include <iterator>
7 
8 /*#include <iostream> // debug
9 	using namespace std;*/
10 
11 
12 template<typename T, typename Hash = KmerHash>
13 struct KmerHashTable {
14   using value_type = std::pair<Kmer, T>;
15   using key_type = Kmer;
16   using mapped_type = T;
17 
18   Hash hasher;
19   value_type *table;
20   size_t size_, pop;
21   value_type empty;
22   value_type deleted;
23 
24 
25 // ---- iterator ----
26 
27   template<bool is_const_iterator = true>
28   class iterator_ : public std::iterator<std::forward_iterator_tag, value_type> {
29    public:
30 
31     typedef typename std::conditional<is_const_iterator, const KmerHashTable *, KmerHashTable *>::type DataStructurePointerType;
32     typedef typename std::conditional<is_const_iterator, const value_type&, value_type&>::type ValueReferenceType;
33     typedef typename std::conditional<is_const_iterator, const value_type *, value_type *>::type ValuePointerType;
34 
35 
36     DataStructurePointerType ht;
37     size_t h;
38 
iterator_KmerHashTable39     iterator_() : ht(nullptr), h(0) {}
iterator_KmerHashTable40     iterator_(DataStructurePointerType ht_) : ht(ht_), h(ht_->size_) {}
iterator_KmerHashTable41     iterator_(DataStructurePointerType ht_, size_t h_) :  ht(ht_), h(h_) {}
iterator_KmerHashTable42     iterator_(const iterator_<false>& o) : ht(o.ht), h(o.h) {}
43     iterator_& operator=(const iterator_& o) {ht=o.ht; h=o.h; return *this;}
44 
45     ValueReferenceType operator*() const {return ht->table[h];}
46     ValuePointerType operator->() const {return &(ht->table[h]);}
47 
find_firstKmerHashTable48     void find_first() {
49       h = 0;
50       if (ht->table != nullptr && ht->size_>0) {
51         Kmer& km = ht->table[h].first;
52         if (km == ht->empty.first || km == ht->deleted.first) {
53           operator++();
54         }
55       }
56     }
57 
58     iterator_ operator++(int) {
59       const iterator_ old(*this);
60       ++(*this);
61       return old;
62     }
63 
64     iterator_& operator++() {
65       if (h == ht->size_) {
66         return *this;
67       }
68       ++h;
69       for (; h < ht->size_; ++h) {
70         Kmer& km = ht->table[h].first;
71         if (km != ht->empty.first && km != ht->deleted.first) {
72           break;
73         }
74       }
75       return *this;
76     }
77     bool operator==(const iterator_ &o) const {return (ht->table == o.ht->table) && (h == o.h);}
78     bool operator!=(const iterator_ &o) const {return !(this->operator==(o));}
79     friend class iterator_<true>;
80   };
81 
82   typedef iterator_<true> const_iterator;
83   typedef iterator_<false> iterator;
84 
85 
86   // --- hash table
87 
88 
hasherKmerHashTable89   KmerHashTable(const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0) {
90     empty.first.set_empty();
91     deleted.first.set_deleted();
92     init_table(1024);
93   }
94 
hasherKmerHashTable95   KmerHashTable(size_t sz, const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0) {
96     empty.first.set_empty();
97     deleted.first.set_deleted();
98     init_table((size_t) (1.2*sz));
99   }
100 
~KmerHashTableKmerHashTable101   ~KmerHashTable() {
102     clear_table();
103   }
104 
clear_tableKmerHashTable105   void clear_table() {
106     if (table != nullptr) {
107       delete[] table;
108       table = nullptr;
109     }
110     size_ = 0;
111     pop  = 0;
112   }
113 
sizeKmerHashTable114   size_t size() const {
115     return pop;
116   }
117 
clearKmerHashTable118   void clear() {
119     std::fill(table, table+size_, empty);
120     pop = 0;
121   }
122 
init_tableKmerHashTable123   void init_table(size_t sz) {
124     clear_table();
125     size_ = rndup(sz);
126     //cerr << "init table of size " << size_ << endl;
127     table = new value_type[size_];
128     std::fill(table, table+size_, empty);
129   }
130 
findKmerHashTable131   iterator find(const Kmer& key) {
132     size_t h = hasher(key) & (size_-1);
133 
134     for (;; h =  (h+1!=size_ ? h+1 : 0)) {
135       if (table[h].first == empty.first) {
136         // empty slot, not in table
137         return iterator(this);
138       } else if (table[h].first == key) {
139         // same key, found
140         return iterator(this, h);
141       } // if it is deleted, we still have to continue
142     }
143   }
144 
findKmerHashTable145   const_iterator find(const Kmer& key) const {
146 
147     size_t h = hasher(key) & (size_-1);
148 
149     for (;; h =  (h+1!=size_ ? h+1 : 0)) {
150       if (table[h].first == empty.first) {
151         // empty slot, not in table
152         return const_iterator(this);
153       } else if (table[h].first == key) {
154         // same key, found
155         return const_iterator(this, h);
156       }
157     }
158   }
159 
160 
eraseKmerHashTable161   iterator erase(const_iterator pos) {
162     if (pos == this->end()) {
163       return this->end();
164     }
165     size_t h = pos.h;
166     table[h] = deleted;
167     --pop;
168     return ++iterator(this, h); // return pointer to next element
169   }
170 
eraseKmerHashTable171   size_t erase(const Kmer& km) {
172     const_iterator pos = find(km);
173     size_t oldpop = pop;
174     if (pos != this->end()) {
175       erase(pos);
176     }
177     return oldpop-pop;
178   }
179 
insertKmerHashTable180   std::pair<iterator,bool> insert(const value_type& val) {
181     //cerr << "inserting " << val.first.toString() << " = " << val.second << endl;
182     if ((pop + (pop>>4))> size_) { // if more than 80% full
183       //cerr << "-- triggered resize--" << endl;
184       reserve(2*size_, false);
185     }
186 
187     size_t h = hasher(val.first) & (size_-1);
188     //cerr << " hash value = " << h << endl;
189     for (;; h = (h+1!=size_ ? h+1 : 0)) {
190       //cerr << "  lookup at " << h << endl;
191       if (table[h].first == empty.first || table[h].first == deleted.first) {
192         //cerr << "   found empty slot" << endl;
193         // empty slot, insert here
194         table[h] = val;
195         ++pop; // new table
196         return {iterator(this, h), true};
197       } else if (table[h].first == val.first) {
198         // same key, update value
199         //cerr << "   found key already here " << table[h].first.toString() << " = " << table[h].second <<  endl;
200         return {iterator(this, h), false};
201       }
202     }
203 
204   }
205 
206   void reserve(size_t sz, bool expand = true) {
207     if (expand) {
208       sz = sz + (sz>>4); // what we actually need for insert
209     }
210     if (sz <= size_) {
211       return;
212     }
213 
214     value_type *old_table = table;
215     size_t old_size_ = size_;
216 
217 
218     size_ = rndup(sz);
219     pop = 0;
220 
221     table = new value_type[size_];
222     std::fill(table, table+size_, empty);
223     for (size_t i = 0; i < old_size_; i++) {
224       if (old_table[i].first != empty.first && old_table[i].first != deleted.first) {
225         insert(old_table[i]);
226       }
227     }
228     delete[] old_table;
229     old_table = nullptr;
230 
231   }
232 
rndupKmerHashTable233   size_t rndup(size_t v) {
234     v--;
235     v |= v >> 1;
236     v |= v >> 2;
237     v |= v >> 4;
238     v |= v >> 8;
239     v |= v >> 16;
240     v |= v >> 32;
241     v++;
242     return v;
243   }
244 
beginKmerHashTable245   iterator begin() {
246     iterator it(this);
247     it.find_first();
248     return it;
249   }
250 
beginKmerHashTable251   const_iterator begin() const {
252     const_iterator it(this);
253     it.find_first();
254     return it;
255   }
256 
endKmerHashTable257   iterator end() {
258     return iterator(this);
259   }
260 
endKmerHashTable261   const_iterator end() const {
262     return const_iterator(this);
263   }
264 
265 
266 
267 
268 
269 };
270 
271 #endif // KALLISTO_KMERHASHTABLE_H
272