1 /////////////////////////////////////////////////////////////////////// 2 // File: indexmapbidi.h 3 // Description: Bi-directional mapping between a sparse and compact space. 4 // Author: rays@google.com (Ray Smith) 5 // 6 // (C) Copyright 2010, Google Inc. 7 // Licensed under the Apache License, Version 2.0 (the "License"); 8 // you may not use this file except in compliance with the License. 9 // You may obtain a copy of the License at 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 /////////////////////////////////////////////////////////////////////// 18 19 #ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_ 20 #define TESSERACT_CCUTIL_INDEXMAPBIDI_H_ 21 22 #include <tesseract/export.h> // for TESS_API 23 24 #include <cstdint> // for int32_t 25 #include <cstdio> 26 #include <vector> 27 28 namespace tesseract { 29 30 class IndexMapBiDi; 31 32 // Bidirectional one-to-one mapping between a sparse and a compact discrete 33 // space. Many entries in the sparse space are unmapped, but those that are 34 // mapped have a 1-1 mapping to (and from) the compact space, where all 35 // values are used. This is useful for forming subsets of larger collections, 36 // such as subsets of character sets, or subsets of binary feature spaces. 37 // 38 // This base class provides basic functionality with binary search for the 39 // SparseToCompact mapping to save memory. 40 // For a faster inverse mapping, or to allow a many-to-one mapping, use 41 // IndexMapBiDi below. 42 // NOTE: there are currently no methods to setup an IndexMap on its own! 43 // It must be initialized by copying from an IndexMapBiDi or by DeSerialize. 44 class TESS_API IndexMap { 45 public: 46 virtual ~IndexMap(); 47 48 // SparseToCompact takes a sparse index to an index in the compact space. 49 // Uses a binary search to find the result. For faster speed use 50 // IndexMapBiDi, but that takes more memory. 51 virtual int SparseToCompact(int sparse_index) const; 52 53 // CompactToSparse takes a compact index to the corresponding index in the 54 // sparse space. CompactToSparse(int compact_index)55 int CompactToSparse(int compact_index) const { 56 return compact_map_[compact_index]; 57 } 58 // The size of the sparse space. SparseSize()59 virtual int SparseSize() const { 60 return sparse_size_; 61 } 62 // The size of the compact space. CompactSize()63 int CompactSize() const { 64 return compact_map_.size(); 65 } 66 67 // Copy from the input. 68 void CopyFrom(const IndexMap &src); 69 void CopyFrom(const IndexMapBiDi &src); 70 71 // Writes to the given file. Returns false in case of error. 72 bool Serialize(FILE *fp) const; 73 // Reads from the given file. Returns false in case of error. 74 // If swap is true, assumes a big/little-endian swap is needed. 75 bool DeSerialize(bool swap, FILE *fp); 76 77 protected: 78 // The sparse space covers integers in the range [0, sparse_size_-1]. 79 int32_t sparse_size_; 80 // The compact space covers integers in the range [0, compact_map_.size()-1]. 81 // Each element contains the corresponding sparse index. 82 std::vector<int32_t> compact_map_; 83 }; 84 85 // Bidirectional many-to-one mapping between a sparse and a compact discrete 86 // space. As with IndexMap, many entries may be unmapped, but unlike IndexMap, 87 // of those that are, many may be mapped to the same compact index. 88 // If the map is many-to-one, it is not possible to directly obtain all the 89 // sparse indices that map to a single compact index. 90 // This map is time- rather than space-efficient. It stores the entire sparse 91 // space. 92 // IndexMapBiDi may be initialized in one of 3 ways: 93 // 1. Init(size, true); 94 // Setup(); 95 // Sets a complete 1:1 mapping with no unmapped elements. 96 // 2. Init(size, false); 97 // for ... SetMap(index, true); 98 // Setup(); 99 // Specifies precisely which sparse indices are mapped. The mapping is 1:1. 100 // 3. Either of the above, followed by: 101 // for ... Merge(index1, index2); 102 // CompleteMerges(); 103 // Allows a many-to-one mapping by merging compact space indices. 104 class TESS_API IndexMapBiDi : public IndexMap { 105 public: 106 ~IndexMapBiDi() override; 107 108 // Top-level init function in a single call to initialize a map to select 109 // a single contiguous subrange [start, end) of the sparse space to be mapped 110 // 1 to 1 to the compact space, with all other elements of the sparse space 111 // left unmapped. 112 // No need to call Setup after this. 113 void InitAndSetupRange(int sparse_size, int start, int end); 114 115 // Initializes just the sparse_map_ to the given size with either all 116 // forward indices mapped (all_mapped = true) or none (all_mapped = false). 117 // Call Setup immediately after, or make calls to SetMap first to adjust the 118 // mapping and then call Setup before using the map. 119 void Init(int size, bool all_mapped); 120 // Sets a given index in the sparse_map_ to be mapped or not. 121 void SetMap(int sparse_index, bool mapped); 122 // Sets up the sparse_map_ and compact_map_ properly after Init and 123 // some calls to SetMap. Assumes an ordered 1-1 map from set indices 124 // in the sparse space to the compact space. 125 void Setup(); 126 127 // Merges the two compact space indices. May be called many times, but 128 // the merges must be concluded by a call to CompleteMerges. 129 // Returns true if a merge was actually performed. 130 bool Merge(int compact_index1, int compact_index2); 131 // Returns true if the given compact index has been deleted. IsCompactDeleted(int index)132 bool IsCompactDeleted(int index) const { 133 return MasterCompactIndex(index) < 0; 134 } 135 // Completes one or more Merge operations by further compacting the 136 // compact space. 137 void CompleteMerges(); 138 139 // SparseToCompact takes a sparse index to an index in the compact space. SparseToCompact(int sparse_index)140 int SparseToCompact(int sparse_index) const override { 141 return sparse_map_[sparse_index]; 142 } 143 // The size of the sparse space. SparseSize()144 int SparseSize() const override { 145 return sparse_map_.size(); 146 } 147 148 // Copy from the input. 149 void CopyFrom(const IndexMapBiDi &src); 150 151 // Writes to the given file. Returns false in case of error. 152 bool Serialize(FILE *fp) const; 153 // Reads from the given file. Returns false in case of error. 154 // If swap is true, assumes a big/little-endian swap is needed. 155 bool DeSerialize(bool swap, FILE *fp); 156 157 // Bulk calls to SparseToCompact. 158 // Maps the given array of sparse indices to an array of compact indices. 159 // Assumes the input is sorted. The output indices are sorted and uniqued. 160 // Return value is the number of "missed" features, being features that 161 // don't map to the compact feature space. 162 int MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const; 163 164 private: 165 // Returns the master compact index for a given compact index. 166 // During a multiple merge operation, several compact indices may be 167 // combined, so we need to be able to find the master of all. MasterCompactIndex(int compact_index)168 int MasterCompactIndex(int compact_index) const { 169 while (compact_index >= 0 && sparse_map_[compact_map_[compact_index]] != compact_index) { 170 compact_index = sparse_map_[compact_map_[compact_index]]; 171 } 172 return compact_index; 173 } 174 175 // Direct look-up of the compact index for each element in sparse space. 176 std::vector<int32_t> sparse_map_; 177 }; 178 179 } // namespace tesseract. 180 181 #endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_ 182