1 // bi-table.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Classes for representing a bijective mapping between an arbitrary entry
20 // of type T and a signed integral ID.
21 
22 #ifndef FST_LIB_BI_TABLE_H__
23 #define FST_LIB_BI_TABLE_H__
24 
25 #include <deque>
26 using std::deque;
27 #include <functional>
28 #include <unordered_map>
29 using std::unordered_map;
30 using std::unordered_multimap;
31 #include <vector>
32 using std::vector;
33 
34 #include <fst/memory.h>
35 #include <unordered_set>
36 using std::unordered_set;
37 using std::unordered_multiset;
38 
39 namespace fst {
40 
41 // BI TABLES - these determine a bijective mapping between an
42 // arbitrary entry of type T and an signed integral ID of type I. The IDs are
43 // allocated starting from 0 in order.
44 //
45 // template <class I, class T>
46 // class BiTable {
47 //  public:
48 //
49 //   // Required constructors.
50 //   BiTable();
51 //
52 //   // Lookup integer ID from entry. If it doesn't exist and 'insert'
53 //   / is true, then add it. Otherwise return -1.
54 //   I FindId(const T &entry, bool insert = true);
55 //   // Lookup entry from integer ID.
56 //   const T &FindEntry(I) const;
57 //   // # of stored entries.
58 //   I Size() const;
59 // };
60 
61 // An implementation using a hash map for the entry to ID mapping.
62 // H is the hash function and E is the equality function.
63 // If passed to the constructor, ownership is given to this class.
64 
65 template <class I, class T, class H, class E = std::equal_to<T> >
66 class HashBiTable {
67  public:
68   // Reserves space for 'table_size' elements.
69   explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
hash_func_(h)70       : hash_func_(h),
71         hash_equal_(e),
72         entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) {
73     if (table_size)
74       id2entry_.reserve(table_size);
75   }
76 
HashBiTable(const HashBiTable<I,T,H,E> & table)77   HashBiTable(const HashBiTable<I, T, H, E> &table)
78       : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
79         hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
80         entry2id_(table.entry2id_.begin(), table.entry2id_.end(),
81                   table.entry2id_.size(),
82                   (hash_func_ ? *hash_func_ : H()),
83                   (hash_equal_ ? *hash_equal_ : E())),
84         id2entry_(table.id2entry_) { }
85 
~HashBiTable()86   ~HashBiTable() {
87     delete hash_func_;
88     delete hash_equal_;
89   }
90 
91   I FindId(const T &entry, bool insert = true) {
92     if (!insert) {
93       typename unordered_map<T, I, H, E>::const_iterator it = entry2id_.find(entry);
94       return it == entry2id_.end() ? -1 : it->second - 1;
95     }
96     I &id_ref = entry2id_[entry];
97     if (id_ref == 0) {  // T not found  store and assign it a new ID
98       id2entry_.push_back(entry);
99       id_ref = id2entry_.size();
100     }
101     return id_ref - 1;  // NB: id_ref = ID + 1
102   }
103 
FindEntry(I s)104   const T &FindEntry(I s) const {
105     return id2entry_[s];
106   }
107 
Size()108   I Size() const { return id2entry_.size(); }
109 
110  private:
111   H *hash_func_;
112   E *hash_equal_;
113   unordered_map<T, I, H, E> entry2id_;
114   vector<T> id2entry_;
115 
116   void operator=(const HashBiTable<I, T, H, E> &table);  // disallow
117 };
118 
119 
120 // Enables alternative hash set representations below.
121 typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
122 
123 // Default hash set is STL hash_set
124 template<class K, class H, class E, HSType>
125 struct  HashSet : public unordered_set<K, H, E, PoolAllocator<K> > {
126   HashSet(size_t n = 0, const H &h = H(), const E &e = E())
127       : unordered_set<K, H, E, PoolAllocator<K> >(n, h, e)  { }
128 
rehashHashSet129   void rehash(size_t n) { }
130 };
131 
132 
133 // An implementation using a hash set for the entry to ID mapping.
134 // The hash set holds 'keys' which are either the ID or kCurrentKey.
135 // These keys can be mapped to entrys either by looking up in the
136 // entry vector or, if kCurrentKey, in current_entry_ member. The hash
137 // and key equality functions map to entries first. H
138 // is the hash function and E is the equality function. If passed to
139 // the constructor, ownership is given to this class.
140 template <class I, class T, class H,
141           class E = std::equal_to<T>, HSType HS = HS_DENSE>
142 class CompactHashBiTable {
143  public:
144   friend class HashFunc;
145   friend class HashEqual;
146 
147   // Reserves space for 'table_size' elements.
148   explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
hash_func_(h)149       : hash_func_(h),
150         hash_equal_(e),
151         compact_hash_func_(*this),
152         compact_hash_equal_(*this),
153         keys_(table_size, compact_hash_func_, compact_hash_equal_) {
154     if (table_size)
155       id2entry_.reserve(table_size);
156   }
157 
CompactHashBiTable(const CompactHashBiTable<I,T,H,E,HS> & table)158   CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table)
159       : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
160         hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
161         compact_hash_func_(*this),
162         compact_hash_equal_(*this),
163         keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_),
164         id2entry_(table.id2entry_) {
165     keys_.insert(table.keys_.begin(), table.keys_.end());
166  }
167 
~CompactHashBiTable()168   ~CompactHashBiTable() {
169     delete hash_func_;
170     delete hash_equal_;
171   }
172 
173   I FindId(const T &entry, bool insert = true) {
174     current_entry_ = &entry;
175     typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
176     if (it == keys_.end()) {  // T not found
177       if (insert) {           // store and assign it a new ID
178         I key = id2entry_.size();
179         id2entry_.push_back(entry);
180         keys_.insert(key);
181         return key;
182       } else {
183         return -1;
184       }
185     } else {
186       return *it;
187     }
188   }
189 
FindEntry(I s)190   const T &FindEntry(I s) const { return id2entry_[s]; }
191 
Size()192   I Size() const { return id2entry_.size(); }
193 
194   // Clear content. With argument, erases last n IDs.
195   void Clear(ssize_t n = -1) {
196     if (n < 0 || n > id2entry_.size())
197       n = id2entry_.size();
198     while (n-- > 0) {
199       I key = id2entry_.size() - 1;
200       keys_.erase(key);
201       id2entry_.pop_back();
202     }
203     keys_.rehash(0);
204   }
205 
206  private:
207   static const I kCurrentKey;    // -1
208   static const I kEmptyKey;      // -2
209   static const I kDeletedKey;    // -3
210 
211   class HashFunc {
212    public:
HashFunc(const CompactHashBiTable & ht)213     HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
214 
operator()215     size_t operator()(I k) const {
216       if (k >= kCurrentKey) {
217         return (*ht_->hash_func_)(ht_->Key2Entry(k));
218       } else {
219         return 0;
220       }
221     }
222 
223    private:
224     const CompactHashBiTable *ht_;
225   };
226 
227   class HashEqual {
228    public:
HashEqual(const CompactHashBiTable & ht)229     HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
230 
operator()231     bool operator()(I k1, I k2) const {
232       if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
233         return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2));
234       } else {
235         return k1 == k2;
236       }
237     }
238    private:
239     const CompactHashBiTable *ht_;
240   };
241 
242   typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
243 
Key2Entry(I k)244   const T &Key2Entry(I k) const {
245     if (k == kCurrentKey)
246       return *current_entry_;
247     else
248       return id2entry_[k];
249   }
250 
251   H *hash_func_;
252   E *hash_equal_;
253   HashFunc compact_hash_func_;
254   HashEqual compact_hash_equal_;
255   KeyHashSet keys_;
256   vector<T> id2entry_;
257   const T *current_entry_;
258 
259   void operator=(const CompactHashBiTable<I, T, H, E, HS> &table);  // disallow
260 };
261 
262 
263 template <class I, class T, class H, class E, HSType HS>
264 const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1;
265 
266 template <class I, class T, class H, class E, HSType HS>
267 const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2;
268 
269 template <class I, class T, class H, class E, HSType HS>
270 const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3;
271 
272 
273 // An implementation using a vector for the entry to ID mapping.
274 // It is passed a function object FP that should fingerprint entries
275 // uniquely to an integer that can used as a vector index. Normally,
276 // VectorBiTable constructs the FP object.  The user can instead
277 // pass in this object; in that case, VectorBiTable takes its
278 // ownership.
279 template <class I, class T, class FP>
280 class VectorBiTable {
281  public:
282   // Reserves space for 'table_size' elements.
283   explicit VectorBiTable(FP *fp = 0, size_t table_size = 0)
284       : fp_(fp ? fp : new FP()) {
285     if (table_size)
286       id2entry_.reserve(table_size);
287   }
288 
VectorBiTable(const VectorBiTable<I,T,FP> & table)289   VectorBiTable(const VectorBiTable<I, T, FP> &table)
290       : fp_(table.fp_ ? new FP(*table.fp_) : 0),
291         fp2id_(table.fp2id_),
292         id2entry_(table.id2entry_) { }
293 
~VectorBiTable()294   ~VectorBiTable() { delete fp_; }
295 
296   I FindId(const T &entry, bool insert = true) {
297     ssize_t fp = (*fp_)(entry);
298     if (fp >= fp2id_.size())
299       fp2id_.resize(fp + 1);
300     I &id_ref = fp2id_[fp];
301     if (id_ref == 0) {  // T not found
302       if (insert) {     // store and assign it a new ID
303         id2entry_.push_back(entry);
304         id_ref = id2entry_.size();
305       } else {
306         return -1;
307       }
308     }
309     return id_ref - 1;  // NB: id_ref = ID + 1
310   }
311 
FindEntry(I s)312   const T &FindEntry(I s) const { return id2entry_[s]; }
313 
Size()314   I Size() const { return id2entry_.size(); }
315 
Fingerprint()316   const FP &Fingerprint() const { return *fp_; }
317 
318  private:
319   FP *fp_;
320   vector<I> fp2id_;
321   vector<T> id2entry_;
322 
323   void operator=(const VectorBiTable<I, T, FP> &table);  // disallow
324 };
325 
326 
327 // An implementation using a vector and a compact hash table. The
328 // selecting functor S returns true for entries to be hashed in the
329 // vector.  The fingerprinting functor FP returns a unique fingerprint
330 // for each entry to be hashed in the vector (these need to be
331 // suitable for indexing in a vector).  The hash functor H is used
332 // when hashing entry into the compact hash table.  If passed to the
333 // constructor, ownership is given to this class.
334 template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE>
335 class VectorHashBiTable {
336  public:
337   friend class HashFunc;
338   friend class HashEqual;
339 
340   explicit VectorHashBiTable(S *s, FP *fp, H *h,
341                              size_t vector_size = 0,
342                              size_t entry_size = 0)
selector_(s)343       : selector_(s),
344         fp_(fp),
345         h_(h),
346         hash_func_(*this),
347         hash_equal_(*this),
348         keys_(0, hash_func_, hash_equal_) {
349     if (vector_size)
350       fp2id_.reserve(vector_size);
351     if (entry_size)
352       id2entry_.reserve(entry_size);
353  }
354 
VectorHashBiTable(const VectorHashBiTable<I,T,S,FP,H,HS> & table)355   VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table)
356       : selector_(new S(table.s_)),
357         fp_(table.fp_ ? new FP(*table.fp_) : 0),
358         h_(table.h_ ? new H(*table.h_) : 0),
359         id2entry_(table.id2entry_),
360         fp2id_(table.fp2id_),
361         hash_func_(*this),
362         hash_equal_(*this),
363         keys_(table.keys_.size(), hash_func_, hash_equal_) {
364      keys_.insert(table.keys_.begin(), table.keys_.end());
365   }
366 
~VectorHashBiTable()367   ~VectorHashBiTable() {
368     delete selector_;
369     delete fp_;
370     delete h_;
371   }
372 
373   I FindId(const T &entry, bool insert = true) {
374     if ((*selector_)(entry)) {  // Use the vector if 'selector_(entry) == true'
375       uint64 fp = (*fp_)(entry);
376       if (fp2id_.size() <= fp)
377         fp2id_.resize(fp + 1, 0);
378       if (fp2id_[fp] == 0) {         // T not found
379         if (insert) {                // store and assign it a new ID
380           id2entry_.push_back(entry);
381           fp2id_[fp] = id2entry_.size();
382         } else {
383           return -1;
384         }
385       }
386       return fp2id_[fp] - 1;  // NB: assoc_value = ID + 1
387     } else {  // Use the hash table otherwise.
388       current_entry_ = &entry;
389       typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
390       if (it == keys_.end()) {
391         if (insert) {
392           I key = id2entry_.size();
393           id2entry_.push_back(entry);
394           keys_.insert(key);
395           return key;
396         } else {
397           return -1;
398         }
399       } else {
400         return *it;
401       }
402     }
403   }
404 
FindEntry(I s)405   const T &FindEntry(I s) const {
406     return id2entry_[s];
407   }
408 
Size()409   I Size() const { return id2entry_.size(); }
410 
Selector()411   const S &Selector() const { return *selector_; }
412 
Fingerprint()413   const FP &Fingerprint() const { return *fp_; }
414 
Hash()415   const H &Hash() const { return *h_; }
416 
417  private:
418   static const I kCurrentKey;  // -1
419   static const I kEmptyKey;    // -2
420 
421   class HashFunc {
422    public:
HashFunc(const VectorHashBiTable & ht)423     HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
424 
operator()425     size_t operator()(I k) const {
426       if (k >= kCurrentKey) {
427         return (*(ht_->h_))(ht_->Key2Entry(k));
428       } else {
429         return 0;
430       }
431     }
432    private:
433     const VectorHashBiTable *ht_;
434   };
435 
436   class HashEqual {
437    public:
HashEqual(const VectorHashBiTable & ht)438     HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
439 
operator()440     bool operator()(I k1, I k2) const {
441       if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
442         return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
443       } else {
444         return k1 == k2;
445       }
446     }
447    private:
448     const VectorHashBiTable *ht_;
449   };
450 
451   typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
452 
Key2Entry(I k)453   const T &Key2Entry(I k) const {
454     if (k == kCurrentKey)
455       return *current_entry_;
456     else
457       return id2entry_[k];
458   }
459 
460   S *selector_;  // Returns true if entry hashed into vector
461   FP *fp_;       // Fingerprint used when hashing entry into vector
462   H *h_;         // Hash function used when hashing entry into hash_set
463 
464   vector<T> id2entry_;  // Maps state IDs to entry
465   vector<I> fp2id_;     // Maps entry fingerprints to IDs
466 
467   // Compact implementation of the hash table mapping entrys to
468   // state IDs using the hash function 'h_'
469   HashFunc hash_func_;
470   HashEqual hash_equal_;
471   KeyHashSet keys_;
472   const T *current_entry_;
473 
474   // disallow
475   void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table);
476 };
477 
478 template <class I, class T, class S, class FP, class H, HSType HS>
479 const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1;
480 
481 template <class I, class T, class S, class FP, class H, HSType HS>
482 const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3;
483 
484 
485 // An implementation using a hash map for the entry to ID
486 // mapping. This version permits erasing of arbitrary states.  The
487 // entry T must have == defined and its default constructor must
488 // produce a entry that will never be seen. F is the hash function.
489 template <class I, class T, class F>
490 class ErasableBiTable {
491  public:
ErasableBiTable()492   ErasableBiTable() : first_(0) {}
493 
494   I FindId(const T &entry, bool insert = true) {
495     I &id_ref = entry2id_[entry];
496     if (id_ref == 0) {  // T not found
497       if (insert) {     // store and assign it a new ID
498         id2entry_.push_back(entry);
499         id_ref = id2entry_.size() + first_;
500       } else {
501         return -1;
502       }
503     }
504     return id_ref - 1;  // NB: id_ref = ID + 1
505   }
506 
FindEntry(I s)507   const T &FindEntry(I s) const { return id2entry_[s - first_]; }
508 
Size()509   I Size() const { return id2entry_.size(); }
510 
Erase(I s)511   void Erase(I s) {
512     T &entry = id2entry_[s - first_];
513     typename unordered_map<T, I, F>::iterator it =
514         entry2id_.find(entry);
515     entry2id_.erase(it);
516     id2entry_[s - first_] = empty_entry_;
517     while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
518       id2entry_.pop_front();
519       ++first_;
520     }
521   }
522 
523  private:
524   unordered_map<T, I, F> entry2id_;
525   deque<T> id2entry_;
526   const T empty_entry_;
527   I first_;        // I of first element in the deque;
528 
529   // disallow
530   void operator=(const ErasableBiTable<I, T, F> &table);  //disallow
531 };
532 
533 }  // namespace fst
534 
535 #endif  // FST_LIB_BI_TABLE_H__
536