1 #ifndef KALLISTO_KMERHASHTABLE_H 2 #define KALLISTO_KMERHASHTABLE_H 3 4 #include <utility> 5 #include <string> 6 #include <iterator> 7 8 /*#include <iostream> // debug 9 using namespace std;*/ 10 11 12 template<typename T, typename Hash = KmerHash> 13 struct KmerHashTable { 14 using value_type = std::pair<Kmer, T>; 15 using key_type = Kmer; 16 using mapped_type = T; 17 18 Hash hasher; 19 value_type *table; 20 size_t size_, pop; 21 value_type empty; 22 value_type deleted; 23 24 25 // ---- iterator ---- 26 27 template<bool is_const_iterator = true> 28 class iterator_ : public std::iterator<std::forward_iterator_tag, value_type> { 29 public: 30 31 typedef typename std::conditional<is_const_iterator, const KmerHashTable *, KmerHashTable *>::type DataStructurePointerType; 32 typedef typename std::conditional<is_const_iterator, const value_type&, value_type&>::type ValueReferenceType; 33 typedef typename std::conditional<is_const_iterator, const value_type *, value_type *>::type ValuePointerType; 34 35 36 DataStructurePointerType ht; 37 size_t h; 38 iterator_KmerHashTable39 iterator_() : ht(nullptr), h(0) {} iterator_KmerHashTable40 iterator_(DataStructurePointerType ht_) : ht(ht_), h(ht_->size_) {} iterator_KmerHashTable41 iterator_(DataStructurePointerType ht_, size_t h_) : ht(ht_), h(h_) {} iterator_KmerHashTable42 iterator_(const iterator_<false>& o) : ht(o.ht), h(o.h) {} 43 iterator_& operator=(const iterator_& o) {ht=o.ht; h=o.h; return *this;} 44 45 ValueReferenceType operator*() const {return ht->table[h];} 46 ValuePointerType operator->() const {return &(ht->table[h]);} 47 find_firstKmerHashTable48 void find_first() { 49 h = 0; 50 if (ht->table != nullptr && ht->size_>0) { 51 Kmer& km = ht->table[h].first; 52 if (km == ht->empty.first || km == ht->deleted.first) { 53 operator++(); 54 } 55 } 56 } 57 58 iterator_ operator++(int) { 59 const iterator_ old(*this); 60 ++(*this); 61 return old; 62 } 63 64 iterator_& operator++() { 65 if (h == ht->size_) { 66 return *this; 67 } 68 ++h; 69 for (; h < ht->size_; ++h) { 70 Kmer& km = ht->table[h].first; 71 if (km != ht->empty.first && km != ht->deleted.first) { 72 break; 73 } 74 } 75 return *this; 76 } 77 bool operator==(const iterator_ &o) const {return (ht->table == o.ht->table) && (h == o.h);} 78 bool operator!=(const iterator_ &o) const {return !(this->operator==(o));} 79 friend class iterator_<true>; 80 }; 81 82 typedef iterator_<true> const_iterator; 83 typedef iterator_<false> iterator; 84 85 86 // --- hash table 87 88 hasherKmerHashTable89 KmerHashTable(const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0) { 90 empty.first.set_empty(); 91 deleted.first.set_deleted(); 92 init_table(1024); 93 } 94 hasherKmerHashTable95 KmerHashTable(size_t sz, const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0) { 96 empty.first.set_empty(); 97 deleted.first.set_deleted(); 98 init_table((size_t) (1.2*sz)); 99 } 100 ~KmerHashTableKmerHashTable101 ~KmerHashTable() { 102 clear_table(); 103 } 104 clear_tableKmerHashTable105 void clear_table() { 106 if (table != nullptr) { 107 delete[] table; 108 table = nullptr; 109 } 110 size_ = 0; 111 pop = 0; 112 } 113 sizeKmerHashTable114 size_t size() const { 115 return pop; 116 } 117 clearKmerHashTable118 void clear() { 119 std::fill(table, table+size_, empty); 120 pop = 0; 121 } 122 init_tableKmerHashTable123 void init_table(size_t sz) { 124 clear_table(); 125 size_ = rndup(sz); 126 //cerr << "init table of size " << size_ << endl; 127 table = new value_type[size_]; 128 std::fill(table, table+size_, empty); 129 } 130 findKmerHashTable131 iterator find(const Kmer& key) { 132 size_t h = hasher(key) & (size_-1); 133 134 for (;; h = (h+1!=size_ ? h+1 : 0)) { 135 if (table[h].first == empty.first) { 136 // empty slot, not in table 137 return iterator(this); 138 } else if (table[h].first == key) { 139 // same key, found 140 return iterator(this, h); 141 } // if it is deleted, we still have to continue 142 } 143 } 144 findKmerHashTable145 const_iterator find(const Kmer& key) const { 146 147 size_t h = hasher(key) & (size_-1); 148 149 for (;; h = (h+1!=size_ ? h+1 : 0)) { 150 if (table[h].first == empty.first) { 151 // empty slot, not in table 152 return const_iterator(this); 153 } else if (table[h].first == key) { 154 // same key, found 155 return const_iterator(this, h); 156 } 157 } 158 } 159 160 eraseKmerHashTable161 iterator erase(const_iterator pos) { 162 if (pos == this->end()) { 163 return this->end(); 164 } 165 size_t h = pos.h; 166 table[h] = deleted; 167 --pop; 168 return ++iterator(this, h); // return pointer to next element 169 } 170 eraseKmerHashTable171 size_t erase(const Kmer& km) { 172 const_iterator pos = find(km); 173 size_t oldpop = pop; 174 if (pos != this->end()) { 175 erase(pos); 176 } 177 return oldpop-pop; 178 } 179 insertKmerHashTable180 std::pair<iterator,bool> insert(const value_type& val) { 181 //cerr << "inserting " << val.first.toString() << " = " << val.second << endl; 182 if ((pop + (pop>>4))> size_) { // if more than 80% full 183 //cerr << "-- triggered resize--" << endl; 184 reserve(2*size_, false); 185 } 186 187 size_t h = hasher(val.first) & (size_-1); 188 //cerr << " hash value = " << h << endl; 189 for (;; h = (h+1!=size_ ? h+1 : 0)) { 190 //cerr << " lookup at " << h << endl; 191 if (table[h].first == empty.first || table[h].first == deleted.first) { 192 //cerr << " found empty slot" << endl; 193 // empty slot, insert here 194 table[h] = val; 195 ++pop; // new table 196 return {iterator(this, h), true}; 197 } else if (table[h].first == val.first) { 198 // same key, update value 199 //cerr << " found key already here " << table[h].first.toString() << " = " << table[h].second << endl; 200 return {iterator(this, h), false}; 201 } 202 } 203 204 } 205 206 void reserve(size_t sz, bool expand = true) { 207 if (expand) { 208 sz = sz + (sz>>4); // what we actually need for insert 209 } 210 if (sz <= size_) { 211 return; 212 } 213 214 value_type *old_table = table; 215 size_t old_size_ = size_; 216 217 218 size_ = rndup(sz); 219 pop = 0; 220 221 table = new value_type[size_]; 222 std::fill(table, table+size_, empty); 223 for (size_t i = 0; i < old_size_; i++) { 224 if (old_table[i].first != empty.first && old_table[i].first != deleted.first) { 225 insert(old_table[i]); 226 } 227 } 228 delete[] old_table; 229 old_table = nullptr; 230 231 } 232 rndupKmerHashTable233 size_t rndup(size_t v) { 234 v--; 235 v |= v >> 1; 236 v |= v >> 2; 237 v |= v >> 4; 238 v |= v >> 8; 239 v |= v >> 16; 240 v |= v >> 32; 241 v++; 242 return v; 243 } 244 beginKmerHashTable245 iterator begin() { 246 iterator it(this); 247 it.find_first(); 248 return it; 249 } 250 beginKmerHashTable251 const_iterator begin() const { 252 const_iterator it(this); 253 it.find_first(); 254 return it; 255 } 256 endKmerHashTable257 iterator end() { 258 return iterator(this); 259 } 260 endKmerHashTable261 const_iterator end() const { 262 return const_iterator(this); 263 } 264 265 266 267 268 269 }; 270 271 #endif // KALLISTO_KMERHASHTABLE_H 272