1 // bi-table.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley@google.com (Michael Riley) 17 // 18 // \file 19 // Classes for representing a bijective mapping between an arbitrary entry 20 // of type T and a signed integral ID. 21 22 #ifndef FST_LIB_BI_TABLE_H__ 23 #define FST_LIB_BI_TABLE_H__ 24 25 #include <deque> 26 using std::deque; 27 #include <functional> 28 #include <unordered_map> 29 using std::unordered_map; 30 using std::unordered_multimap; 31 #include <vector> 32 using std::vector; 33 34 #include <fst/memory.h> 35 #include <unordered_set> 36 using std::unordered_set; 37 using std::unordered_multiset; 38 39 namespace fst { 40 41 // BI TABLES - these determine a bijective mapping between an 42 // arbitrary entry of type T and an signed integral ID of type I. The IDs are 43 // allocated starting from 0 in order. 44 // 45 // template <class I, class T> 46 // class BiTable { 47 // public: 48 // 49 // // Required constructors. 50 // BiTable(); 51 // 52 // // Lookup integer ID from entry. If it doesn't exist and 'insert' 53 // / is true, then add it. Otherwise return -1. 54 // I FindId(const T &entry, bool insert = true); 55 // // Lookup entry from integer ID. 56 // const T &FindEntry(I) const; 57 // // # of stored entries. 58 // I Size() const; 59 // }; 60 61 // An implementation using a hash map for the entry to ID mapping. 62 // H is the hash function and E is the equality function. 63 // If passed to the constructor, ownership is given to this class. 64 65 template <class I, class T, class H, class E = std::equal_to<T> > 66 class HashBiTable { 67 public: 68 // Reserves space for 'table_size' elements. 69 explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) hash_func_(h)70 : hash_func_(h), 71 hash_equal_(e), 72 entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) { 73 if (table_size) 74 id2entry_.reserve(table_size); 75 } 76 HashBiTable(const HashBiTable<I,T,H,E> & table)77 HashBiTable(const HashBiTable<I, T, H, E> &table) 78 : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), 79 hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), 80 entry2id_(table.entry2id_.begin(), table.entry2id_.end(), 81 table.entry2id_.size(), 82 (hash_func_ ? *hash_func_ : H()), 83 (hash_equal_ ? *hash_equal_ : E())), 84 id2entry_(table.id2entry_) { } 85 ~HashBiTable()86 ~HashBiTable() { 87 delete hash_func_; 88 delete hash_equal_; 89 } 90 91 I FindId(const T &entry, bool insert = true) { 92 if (!insert) { 93 typename unordered_map<T, I, H, E>::const_iterator it = entry2id_.find(entry); 94 return it == entry2id_.end() ? -1 : it->second - 1; 95 } 96 I &id_ref = entry2id_[entry]; 97 if (id_ref == 0) { // T not found store and assign it a new ID 98 id2entry_.push_back(entry); 99 id_ref = id2entry_.size(); 100 } 101 return id_ref - 1; // NB: id_ref = ID + 1 102 } 103 FindEntry(I s)104 const T &FindEntry(I s) const { 105 return id2entry_[s]; 106 } 107 Size()108 I Size() const { return id2entry_.size(); } 109 110 private: 111 H *hash_func_; 112 E *hash_equal_; 113 unordered_map<T, I, H, E> entry2id_; 114 vector<T> id2entry_; 115 116 void operator=(const HashBiTable<I, T, H, E> &table); // disallow 117 }; 118 119 120 // Enables alternative hash set representations below. 121 typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; 122 123 // Default hash set is STL hash_set 124 template<class K, class H, class E, HSType> 125 struct HashSet : public unordered_set<K, H, E, PoolAllocator<K> > { 126 HashSet(size_t n = 0, const H &h = H(), const E &e = E()) 127 : unordered_set<K, H, E, PoolAllocator<K> >(n, h, e) { } 128 rehashHashSet129 void rehash(size_t n) { } 130 }; 131 132 133 // An implementation using a hash set for the entry to ID mapping. 134 // The hash set holds 'keys' which are either the ID or kCurrentKey. 135 // These keys can be mapped to entrys either by looking up in the 136 // entry vector or, if kCurrentKey, in current_entry_ member. The hash 137 // and key equality functions map to entries first. H 138 // is the hash function and E is the equality function. If passed to 139 // the constructor, ownership is given to this class. 140 template <class I, class T, class H, 141 class E = std::equal_to<T>, HSType HS = HS_DENSE> 142 class CompactHashBiTable { 143 public: 144 friend class HashFunc; 145 friend class HashEqual; 146 147 // Reserves space for 'table_size' elements. 148 explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) hash_func_(h)149 : hash_func_(h), 150 hash_equal_(e), 151 compact_hash_func_(*this), 152 compact_hash_equal_(*this), 153 keys_(table_size, compact_hash_func_, compact_hash_equal_) { 154 if (table_size) 155 id2entry_.reserve(table_size); 156 } 157 CompactHashBiTable(const CompactHashBiTable<I,T,H,E,HS> & table)158 CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table) 159 : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), 160 hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), 161 compact_hash_func_(*this), 162 compact_hash_equal_(*this), 163 keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_), 164 id2entry_(table.id2entry_) { 165 keys_.insert(table.keys_.begin(), table.keys_.end()); 166 } 167 ~CompactHashBiTable()168 ~CompactHashBiTable() { 169 delete hash_func_; 170 delete hash_equal_; 171 } 172 173 I FindId(const T &entry, bool insert = true) { 174 current_entry_ = &entry; 175 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 176 if (it == keys_.end()) { // T not found 177 if (insert) { // store and assign it a new ID 178 I key = id2entry_.size(); 179 id2entry_.push_back(entry); 180 keys_.insert(key); 181 return key; 182 } else { 183 return -1; 184 } 185 } else { 186 return *it; 187 } 188 } 189 FindEntry(I s)190 const T &FindEntry(I s) const { return id2entry_[s]; } 191 Size()192 I Size() const { return id2entry_.size(); } 193 194 // Clear content. With argument, erases last n IDs. 195 void Clear(ssize_t n = -1) { 196 if (n < 0 || n > id2entry_.size()) 197 n = id2entry_.size(); 198 while (n-- > 0) { 199 I key = id2entry_.size() - 1; 200 keys_.erase(key); 201 id2entry_.pop_back(); 202 } 203 keys_.rehash(0); 204 } 205 206 private: 207 static const I kCurrentKey; // -1 208 static const I kEmptyKey; // -2 209 static const I kDeletedKey; // -3 210 211 class HashFunc { 212 public: HashFunc(const CompactHashBiTable & ht)213 HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} 214 operator()215 size_t operator()(I k) const { 216 if (k >= kCurrentKey) { 217 return (*ht_->hash_func_)(ht_->Key2Entry(k)); 218 } else { 219 return 0; 220 } 221 } 222 223 private: 224 const CompactHashBiTable *ht_; 225 }; 226 227 class HashEqual { 228 public: HashEqual(const CompactHashBiTable & ht)229 HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} 230 operator()231 bool operator()(I k1, I k2) const { 232 if (k1 >= kCurrentKey && k2 >= kCurrentKey) { 233 return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2)); 234 } else { 235 return k1 == k2; 236 } 237 } 238 private: 239 const CompactHashBiTable *ht_; 240 }; 241 242 typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; 243 Key2Entry(I k)244 const T &Key2Entry(I k) const { 245 if (k == kCurrentKey) 246 return *current_entry_; 247 else 248 return id2entry_[k]; 249 } 250 251 H *hash_func_; 252 E *hash_equal_; 253 HashFunc compact_hash_func_; 254 HashEqual compact_hash_equal_; 255 KeyHashSet keys_; 256 vector<T> id2entry_; 257 const T *current_entry_; 258 259 void operator=(const CompactHashBiTable<I, T, H, E, HS> &table); // disallow 260 }; 261 262 263 template <class I, class T, class H, class E, HSType HS> 264 const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1; 265 266 template <class I, class T, class H, class E, HSType HS> 267 const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2; 268 269 template <class I, class T, class H, class E, HSType HS> 270 const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3; 271 272 273 // An implementation using a vector for the entry to ID mapping. 274 // It is passed a function object FP that should fingerprint entries 275 // uniquely to an integer that can used as a vector index. Normally, 276 // VectorBiTable constructs the FP object. The user can instead 277 // pass in this object; in that case, VectorBiTable takes its 278 // ownership. 279 template <class I, class T, class FP> 280 class VectorBiTable { 281 public: 282 // Reserves space for 'table_size' elements. 283 explicit VectorBiTable(FP *fp = 0, size_t table_size = 0) 284 : fp_(fp ? fp : new FP()) { 285 if (table_size) 286 id2entry_.reserve(table_size); 287 } 288 VectorBiTable(const VectorBiTable<I,T,FP> & table)289 VectorBiTable(const VectorBiTable<I, T, FP> &table) 290 : fp_(table.fp_ ? new FP(*table.fp_) : 0), 291 fp2id_(table.fp2id_), 292 id2entry_(table.id2entry_) { } 293 ~VectorBiTable()294 ~VectorBiTable() { delete fp_; } 295 296 I FindId(const T &entry, bool insert = true) { 297 ssize_t fp = (*fp_)(entry); 298 if (fp >= fp2id_.size()) 299 fp2id_.resize(fp + 1); 300 I &id_ref = fp2id_[fp]; 301 if (id_ref == 0) { // T not found 302 if (insert) { // store and assign it a new ID 303 id2entry_.push_back(entry); 304 id_ref = id2entry_.size(); 305 } else { 306 return -1; 307 } 308 } 309 return id_ref - 1; // NB: id_ref = ID + 1 310 } 311 FindEntry(I s)312 const T &FindEntry(I s) const { return id2entry_[s]; } 313 Size()314 I Size() const { return id2entry_.size(); } 315 Fingerprint()316 const FP &Fingerprint() const { return *fp_; } 317 318 private: 319 FP *fp_; 320 vector<I> fp2id_; 321 vector<T> id2entry_; 322 323 void operator=(const VectorBiTable<I, T, FP> &table); // disallow 324 }; 325 326 327 // An implementation using a vector and a compact hash table. The 328 // selecting functor S returns true for entries to be hashed in the 329 // vector. The fingerprinting functor FP returns a unique fingerprint 330 // for each entry to be hashed in the vector (these need to be 331 // suitable for indexing in a vector). The hash functor H is used 332 // when hashing entry into the compact hash table. If passed to the 333 // constructor, ownership is given to this class. 334 template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE> 335 class VectorHashBiTable { 336 public: 337 friend class HashFunc; 338 friend class HashEqual; 339 340 explicit VectorHashBiTable(S *s, FP *fp, H *h, 341 size_t vector_size = 0, 342 size_t entry_size = 0) selector_(s)343 : selector_(s), 344 fp_(fp), 345 h_(h), 346 hash_func_(*this), 347 hash_equal_(*this), 348 keys_(0, hash_func_, hash_equal_) { 349 if (vector_size) 350 fp2id_.reserve(vector_size); 351 if (entry_size) 352 id2entry_.reserve(entry_size); 353 } 354 VectorHashBiTable(const VectorHashBiTable<I,T,S,FP,H,HS> & table)355 VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table) 356 : selector_(new S(table.s_)), 357 fp_(table.fp_ ? new FP(*table.fp_) : 0), 358 h_(table.h_ ? new H(*table.h_) : 0), 359 id2entry_(table.id2entry_), 360 fp2id_(table.fp2id_), 361 hash_func_(*this), 362 hash_equal_(*this), 363 keys_(table.keys_.size(), hash_func_, hash_equal_) { 364 keys_.insert(table.keys_.begin(), table.keys_.end()); 365 } 366 ~VectorHashBiTable()367 ~VectorHashBiTable() { 368 delete selector_; 369 delete fp_; 370 delete h_; 371 } 372 373 I FindId(const T &entry, bool insert = true) { 374 if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' 375 uint64 fp = (*fp_)(entry); 376 if (fp2id_.size() <= fp) 377 fp2id_.resize(fp + 1, 0); 378 if (fp2id_[fp] == 0) { // T not found 379 if (insert) { // store and assign it a new ID 380 id2entry_.push_back(entry); 381 fp2id_[fp] = id2entry_.size(); 382 } else { 383 return -1; 384 } 385 } 386 return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 387 } else { // Use the hash table otherwise. 388 current_entry_ = &entry; 389 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 390 if (it == keys_.end()) { 391 if (insert) { 392 I key = id2entry_.size(); 393 id2entry_.push_back(entry); 394 keys_.insert(key); 395 return key; 396 } else { 397 return -1; 398 } 399 } else { 400 return *it; 401 } 402 } 403 } 404 FindEntry(I s)405 const T &FindEntry(I s) const { 406 return id2entry_[s]; 407 } 408 Size()409 I Size() const { return id2entry_.size(); } 410 Selector()411 const S &Selector() const { return *selector_; } 412 Fingerprint()413 const FP &Fingerprint() const { return *fp_; } 414 Hash()415 const H &Hash() const { return *h_; } 416 417 private: 418 static const I kCurrentKey; // -1 419 static const I kEmptyKey; // -2 420 421 class HashFunc { 422 public: HashFunc(const VectorHashBiTable & ht)423 HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} 424 operator()425 size_t operator()(I k) const { 426 if (k >= kCurrentKey) { 427 return (*(ht_->h_))(ht_->Key2Entry(k)); 428 } else { 429 return 0; 430 } 431 } 432 private: 433 const VectorHashBiTable *ht_; 434 }; 435 436 class HashEqual { 437 public: HashEqual(const VectorHashBiTable & ht)438 HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} 439 operator()440 bool operator()(I k1, I k2) const { 441 if (k1 >= kCurrentKey && k2 >= kCurrentKey) { 442 return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); 443 } else { 444 return k1 == k2; 445 } 446 } 447 private: 448 const VectorHashBiTable *ht_; 449 }; 450 451 typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; 452 Key2Entry(I k)453 const T &Key2Entry(I k) const { 454 if (k == kCurrentKey) 455 return *current_entry_; 456 else 457 return id2entry_[k]; 458 } 459 460 S *selector_; // Returns true if entry hashed into vector 461 FP *fp_; // Fingerprint used when hashing entry into vector 462 H *h_; // Hash function used when hashing entry into hash_set 463 464 vector<T> id2entry_; // Maps state IDs to entry 465 vector<I> fp2id_; // Maps entry fingerprints to IDs 466 467 // Compact implementation of the hash table mapping entrys to 468 // state IDs using the hash function 'h_' 469 HashFunc hash_func_; 470 HashEqual hash_equal_; 471 KeyHashSet keys_; 472 const T *current_entry_; 473 474 // disallow 475 void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table); 476 }; 477 478 template <class I, class T, class S, class FP, class H, HSType HS> 479 const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1; 480 481 template <class I, class T, class S, class FP, class H, HSType HS> 482 const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3; 483 484 485 // An implementation using a hash map for the entry to ID 486 // mapping. This version permits erasing of arbitrary states. The 487 // entry T must have == defined and its default constructor must 488 // produce a entry that will never be seen. F is the hash function. 489 template <class I, class T, class F> 490 class ErasableBiTable { 491 public: ErasableBiTable()492 ErasableBiTable() : first_(0) {} 493 494 I FindId(const T &entry, bool insert = true) { 495 I &id_ref = entry2id_[entry]; 496 if (id_ref == 0) { // T not found 497 if (insert) { // store and assign it a new ID 498 id2entry_.push_back(entry); 499 id_ref = id2entry_.size() + first_; 500 } else { 501 return -1; 502 } 503 } 504 return id_ref - 1; // NB: id_ref = ID + 1 505 } 506 FindEntry(I s)507 const T &FindEntry(I s) const { return id2entry_[s - first_]; } 508 Size()509 I Size() const { return id2entry_.size(); } 510 Erase(I s)511 void Erase(I s) { 512 T &entry = id2entry_[s - first_]; 513 typename unordered_map<T, I, F>::iterator it = 514 entry2id_.find(entry); 515 entry2id_.erase(it); 516 id2entry_[s - first_] = empty_entry_; 517 while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { 518 id2entry_.pop_front(); 519 ++first_; 520 } 521 } 522 523 private: 524 unordered_map<T, I, F> entry2id_; 525 deque<T> id2entry_; 526 const T empty_entry_; 527 I first_; // I of first element in the deque; 528 529 // disallow 530 void operator=(const ErasableBiTable<I, T, F> &table); //disallow 531 }; 532 533 } // namespace fst 534 535 #endif // FST_LIB_BI_TABLE_H__ 536