1 ///////////////////////////////////////////////////////////////////////
2 // File:        indexmapbidi.h
3 // Description: Bi-directional mapping between a sparse and compact space.
4 // Author:      rays@google.com (Ray Smith)
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18 
19 #ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_
20 #define TESSERACT_CCUTIL_INDEXMAPBIDI_H_
21 
22 #include <tesseract/export.h> // for TESS_API
23 
24 #include <cstdint>  // for int32_t
25 #include <cstdio>
26 #include <vector>
27 
28 namespace tesseract {
29 
30 class IndexMapBiDi;
31 
32 // Bidirectional one-to-one mapping between a sparse and a compact discrete
33 // space. Many entries in the sparse space are unmapped, but those that are
34 // mapped have a 1-1 mapping to (and from) the compact space, where all
35 // values are used. This is useful for forming subsets of larger collections,
36 // such as subsets of character sets, or subsets of binary feature spaces.
37 //
38 // This base class provides basic functionality with binary search for the
39 // SparseToCompact mapping to save memory.
40 // For a faster inverse mapping, or to allow a many-to-one mapping, use
41 // IndexMapBiDi below.
42 // NOTE: there are currently no methods to setup an IndexMap on its own!
43 // It must be initialized by copying from an IndexMapBiDi or by DeSerialize.
44 class TESS_API IndexMap {
45 public:
46   virtual ~IndexMap();
47 
48   // SparseToCompact takes a sparse index to an index in the compact space.
49   // Uses a binary search to find the result. For faster speed use
50   // IndexMapBiDi, but that takes more memory.
51   virtual int SparseToCompact(int sparse_index) const;
52 
53   // CompactToSparse takes a compact index to the corresponding index in the
54   // sparse space.
CompactToSparse(int compact_index)55   int CompactToSparse(int compact_index) const {
56     return compact_map_[compact_index];
57   }
58   // The size of the sparse space.
SparseSize()59   virtual int SparseSize() const {
60     return sparse_size_;
61   }
62   // The size of the compact space.
CompactSize()63   int CompactSize() const {
64     return compact_map_.size();
65   }
66 
67   // Copy from the input.
68   void CopyFrom(const IndexMap &src);
69   void CopyFrom(const IndexMapBiDi &src);
70 
71   // Writes to the given file. Returns false in case of error.
72   bool Serialize(FILE *fp) const;
73   // Reads from the given file. Returns false in case of error.
74   // If swap is true, assumes a big/little-endian swap is needed.
75   bool DeSerialize(bool swap, FILE *fp);
76 
77 protected:
78   // The sparse space covers integers in the range [0, sparse_size_-1].
79   int32_t sparse_size_;
80   // The compact space covers integers in the range [0, compact_map_.size()-1].
81   // Each element contains the corresponding sparse index.
82   std::vector<int32_t> compact_map_;
83 };
84 
85 // Bidirectional many-to-one mapping between a sparse and a compact discrete
86 // space. As with IndexMap, many entries may be unmapped, but unlike IndexMap,
87 // of those that are, many may be mapped to the same compact index.
88 // If the map is many-to-one, it is not possible to directly obtain all the
89 // sparse indices that map to a single compact index.
90 // This map is time- rather than space-efficient. It stores the entire sparse
91 // space.
92 // IndexMapBiDi may be initialized in one of 3 ways:
93 // 1. Init(size, true);
94 //    Setup();
95 //    Sets a complete 1:1 mapping with no unmapped elements.
96 // 2. Init(size, false);
97 //    for ... SetMap(index, true);
98 //    Setup();
99 //    Specifies precisely which sparse indices are mapped. The mapping is 1:1.
100 // 3. Either of the above, followed by:
101 //    for ... Merge(index1, index2);
102 //    CompleteMerges();
103 //    Allows a many-to-one mapping by merging compact space indices.
104 class TESS_API IndexMapBiDi : public IndexMap {
105 public:
106   ~IndexMapBiDi() override;
107 
108   // Top-level init function in a single call to initialize a map to select
109   // a single contiguous subrange [start, end) of the sparse space to be mapped
110   // 1 to 1 to the compact space, with all other elements of the sparse space
111   // left unmapped.
112   // No need to call Setup after this.
113   void InitAndSetupRange(int sparse_size, int start, int end);
114 
115   // Initializes just the sparse_map_ to the given size with either all
116   // forward indices mapped (all_mapped = true) or none (all_mapped = false).
117   // Call Setup immediately after, or make calls to SetMap first to adjust the
118   // mapping and then call Setup before using the map.
119   void Init(int size, bool all_mapped);
120   // Sets a given index in the sparse_map_ to be mapped or not.
121   void SetMap(int sparse_index, bool mapped);
122   // Sets up the sparse_map_ and compact_map_ properly after Init and
123   // some calls to SetMap. Assumes an ordered 1-1 map from set indices
124   // in the sparse space to the compact space.
125   void Setup();
126 
127   // Merges the two compact space indices. May be called many times, but
128   // the merges must be concluded by a call to CompleteMerges.
129   // Returns true if a merge was actually performed.
130   bool Merge(int compact_index1, int compact_index2);
131   // Returns true if the given compact index has been deleted.
IsCompactDeleted(int index)132   bool IsCompactDeleted(int index) const {
133     return MasterCompactIndex(index) < 0;
134   }
135   // Completes one or more Merge operations by further compacting the
136   // compact space.
137   void CompleteMerges();
138 
139   // SparseToCompact takes a sparse index to an index in the compact space.
SparseToCompact(int sparse_index)140   int SparseToCompact(int sparse_index) const override {
141     return sparse_map_[sparse_index];
142   }
143   // The size of the sparse space.
SparseSize()144   int SparseSize() const override {
145     return sparse_map_.size();
146   }
147 
148   // Copy from the input.
149   void CopyFrom(const IndexMapBiDi &src);
150 
151   // Writes to the given file. Returns false in case of error.
152   bool Serialize(FILE *fp) const;
153   // Reads from the given file. Returns false in case of error.
154   // If swap is true, assumes a big/little-endian swap is needed.
155   bool DeSerialize(bool swap, FILE *fp);
156 
157   // Bulk calls to SparseToCompact.
158   // Maps the given array of sparse indices to an array of compact indices.
159   // Assumes the input is sorted. The output indices are sorted and uniqued.
160   // Return value is the number of "missed" features, being features that
161   // don't map to the compact feature space.
162   int MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const;
163 
164 private:
165   // Returns the master compact index for a given compact index.
166   // During a multiple merge operation, several compact indices may be
167   // combined, so we need to be able to find the master of all.
MasterCompactIndex(int compact_index)168   int MasterCompactIndex(int compact_index) const {
169     while (compact_index >= 0 && sparse_map_[compact_map_[compact_index]] != compact_index) {
170       compact_index = sparse_map_[compact_map_[compact_index]];
171     }
172     return compact_index;
173   }
174 
175   // Direct look-up of the compact index for each element in sparse space.
176   std::vector<int32_t> sparse_map_;
177 };
178 
179 } // namespace tesseract.
180 
181 #endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_
182