1 /////////////////////////////////////////////////////////////////////// 2 // File: equationdetect.h 3 // Description: The equation detection class that inherits equationdetectbase. 4 // Author: Zongyi (Joe) Liu (joeliu@google.com) 5 // 6 // (C) Copyright 2011, Google Inc. 7 // Licensed under the Apache License, Version 2.0 (the "License"); 8 // you may not use this file except in compliance with the License. 9 // You may obtain a copy of the License at 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 /////////////////////////////////////////////////////////////////////// 18 19 #ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H_ 20 #define TESSERACT_CCMAIN_EQUATIONDETECT_H_ 21 22 #include <tesseract/unichar.h> // for UNICHAR_ID 23 #include "blobbox.h" // for BLOBNBOX (ptr only), BlobSpecialText... 24 #include "equationdetectbase.h" // for EquationDetectBase 25 #include "tesseractclass.h" // for Tesseract 26 27 class TBOX; 28 class UNICHARSET; 29 30 namespace tesseract { 31 32 class Tesseract; 33 class ColPartition; 34 class ColPartitionGrid; 35 class ColPartitionSet; 36 37 class TESS_API EquationDetect : public EquationDetectBase { 38 public: 39 EquationDetect(const char *equ_datapath, const char *equ_language); 40 ~EquationDetect() override; 41 42 enum IndentType { NO_INDENT, LEFT_INDENT, RIGHT_INDENT, BOTH_INDENT, INDENT_TYPE_COUNT }; 43 44 // Reset the lang_tesseract_ pointer. This function should be called before we 45 // do any detector work. 46 void SetLangTesseract(Tesseract *lang_tesseract); 47 48 // Iterate over the blobs inside to_block, and set the blobs that we want to 49 // process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function 50 // returns 0 upon success. 51 int LabelSpecialText(TO_BLOCK *to_block) override; 52 53 // Find possible equation partitions from part_grid. Should be called 54 // after the special_text_type of blobs are set. 55 // It returns 0 upon success. 56 int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns) override; 57 58 // Reset the resolution of the processing image. TEST only function. 59 void SetResolution(const int resolution); 60 61 protected: 62 // Identify the special text type for one blob, and update its field. When 63 // height_th is set (> 0), we will label the blob as BSTT_NONE if its height 64 // is less than height_th. 65 void IdentifySpecialText(BLOBNBOX *blob, const int height_th); 66 67 // Estimate the type for one unichar. 68 BlobSpecialTextType EstimateTypeForUnichar(const UNICHARSET &unicharset, 69 const UNICHAR_ID id) const; 70 71 // Compute special text type for each blobs in part_grid_. 72 void IdentifySpecialText(); 73 74 // Identify blobs that we want to skip during special blob type 75 // classification. 76 void IdentifyBlobsToSkip(ColPartition *part); 77 78 // The ColPartitions in part_grid_ maybe over-segmented, particularly in the 79 // block equation regions. So we like to identify these partitions and merge 80 // them before we do the searching. 81 void MergePartsByLocation(); 82 83 // Staring from the seed center, we do radius search. And for partitions that 84 // have large overlaps with seed, we remove them from part_grid_ and add into 85 // parts_overlap. Note: this function may update the part_grid_, so if the 86 // caller is also running ColPartitionGridSearch, use the RepositionIterator 87 // to continue. 88 void SearchByOverlap(ColPartition *seed, std::vector<ColPartition *> *parts_overlap); 89 90 // Insert part back into part_grid_, after it absorbs some other parts. 91 void InsertPartAfterAbsorb(ColPartition *part); 92 93 // Identify the colparitions in part_grid_, label them as PT_EQUATION, and 94 // save them into cp_seeds_. 95 void IdentifySeedParts(); 96 97 // Check the blobs count for a seed region candidate. 98 bool CheckSeedBlobsCount(ColPartition *part); 99 100 // Compute the foreground pixel density for a tbox area. 101 float ComputeForegroundDensity(const TBOX &tbox); 102 103 // Check if part from seed2 label: with low math density and left indented. We 104 // are using two checks: 105 // 1. If its left is aligned with any coordinates in indented_texts_left, 106 // which we assume have been sorted. 107 // 2. If its foreground density is over foreground_density_th. 108 bool CheckForSeed2(const std::vector<int> &indented_texts_left, 109 const float foreground_density_th, ColPartition *part); 110 111 // Count the number of values in sorted_vec that is close to val, used to 112 // check if a partition is aligned with text partitions. 113 int CountAlignment(const std::vector<int> &sorted_vec, const int val) const; 114 115 // Check for a seed candidate using the foreground pixel density. And we 116 // return true if the density is below a certain threshold, because characters 117 // in equation regions usually are apart with more white spaces. 118 bool CheckSeedFgDensity(const float density_th, ColPartition *part); 119 120 // A light version of SplitCPHor: instead of really doing the part split, we 121 // simply compute the union bounding box of each split part. 122 void SplitCPHorLite(ColPartition *part, std::vector<TBOX> *splitted_boxes); 123 124 // Split the part (horizontally), and save the split result into 125 // parts_splitted. Note that it is caller's responsibility to release the 126 // memory owns by parts_splitted. On the other hand, the part is unchanged 127 // during this process and still owns the blobs, so do NOT call DeleteBoxes 128 // when freeing the colpartitions in parts_splitted. 129 void SplitCPHor(ColPartition *part, std::vector<ColPartition *> *parts_splitted); 130 131 // Check the density for a seed candidate (part) using its math density and 132 // italic density, returns true if the check passed. 133 bool CheckSeedDensity(const float math_density_high, const float math_density_low, 134 const ColPartition *part) const; 135 136 // Check if part is indented. 137 IndentType IsIndented(ColPartition *part); 138 139 // Identify inline partitions from cp_seeds_, and re-label them. 140 void IdentifyInlineParts(); 141 142 // Compute the super bounding box for all colpartitions inside part_grid_. 143 void ComputeCPsSuperBBox(); 144 145 // Identify inline partitions from cp_seeds_ using the horizontal search. 146 void IdentifyInlinePartsHorizontal(); 147 148 // Estimate the line spacing between two text partitions. Returns -1 if not 149 // enough data. 150 int EstimateTextPartLineSpacing(); 151 152 // Identify inline partitions from cp_seeds_ using vertical search. 153 void IdentifyInlinePartsVertical(const bool top_to_bottom, const int textPartsLineSpacing); 154 155 // Check if part is an inline equation zone. This should be called after we 156 // identified the seed regions. 157 bool IsInline(const bool search_bottom, const int textPartsLineSpacing, ColPartition *part); 158 159 // For a given seed partition, we search the part_grid_ and see if there is 160 // any partition can be merged with it. It returns true if the seed has been 161 // expanded. 162 bool ExpandSeed(ColPartition *seed); 163 164 // Starting from the seed position, we search the part_grid_ 165 // horizontally/vertically, find all partitions that can be 166 // merged with seed, remove them from part_grid_, and put them into 167 // parts_to_merge. 168 void ExpandSeedHorizontal(const bool search_left, ColPartition *seed, 169 std::vector<ColPartition *> *parts_to_merge); 170 void ExpandSeedVertical(const bool search_bottom, ColPartition *seed, 171 std::vector<ColPartition *> *parts_to_merge); 172 173 // Check if a part_box is the small neighbor of seed_box. 174 bool IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const; 175 176 // Perform the density check for part, which we assume is nearing a seed 177 // partition. It returns true if the check passed. 178 bool CheckSeedNeighborDensity(const ColPartition *part) const; 179 180 // After identify the math blocks, we do one more scanning on all text 181 // partitions, and check if any of them is the satellite of: 182 // math blocks: here a p is the satellite of q if: 183 // 1. q is the nearest vertical neighbor of p, and 184 // 2. y_gap(p, q) is less than a threshold, and 185 // 3. x_overlap(p, q) is over a threshold. 186 // Note that p can be the satellites of two blocks: its top neighbor and 187 // bottom neighbor. 188 void ProcessMathBlockSatelliteParts(); 189 190 // Check if part is the satellite of one/two math blocks. If it is, we return 191 // true, and save the blocks into math_blocks. 192 bool IsMathBlockSatellite(ColPartition *part, std::vector<ColPartition *> *math_blocks); 193 194 // Search the nearest neighbor of part in one vertical direction as defined in 195 // search_bottom. It returns the neighbor found that major x overlap with it, 196 // or nullptr when not found. 197 ColPartition *SearchNNVertical(const bool search_bottom, const ColPartition *part); 198 199 // Check if the neighbor with vertical distance of y_gap is a near and math 200 // block partition. 201 bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const; 202 203 // Generate the tiff file name for output/debug file. 204 void GetOutputTiffName(const char *name, std::string &image_name) const; 205 206 // Debugger function that renders ColPartitions on the input image, where: 207 // parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION 208 // will be painted in green, and other parts will be painted in blue. 209 void PaintColParts(const std::string &outfile) const; 210 211 // Debugger function that renders the blobs in part_grid_ over the input 212 // image. 213 void PaintSpecialTexts(const std::string &outfile) const; 214 215 // Debugger function that print the math blobs density values for a 216 // ColPartition object. 217 void PrintSpecialBlobsDensity(const ColPartition *part) const; 218 219 // The tesseract engine initialized from equation training data. 220 Tesseract equ_tesseract_; 221 222 // The tesseract engine used for OCR. This pointer is passed in by the caller, 223 // so do NOT destroy it in this class. 224 Tesseract *lang_tesseract_; 225 226 // The ColPartitionGrid that we are processing. This pointer is passed in from 227 // the caller, so do NOT destroy it in the class. 228 ColPartitionGrid *part_grid_ = nullptr; 229 230 // A simple array of pointers to the best assigned column division at 231 // each grid y coordinate. This pointer is passed in from the caller, so do 232 // NOT destroy it in the class. 233 ColPartitionSet **best_columns_ = nullptr; 234 235 // The super bounding box of all cps in the part_grid_. 236 TBOX *cps_super_bbox_; 237 238 // The seed ColPartition for equation region. 239 std::vector<ColPartition *> cp_seeds_; 240 241 // The resolution (dpi) of the processing image. 242 int resolution_; 243 244 // The number of pages we have processed. 245 int page_count_; 246 }; 247 248 } // namespace tesseract 249 250 #endif // TESSERACT_CCMAIN_EQUATIONDETECT_H_ 251