1 // Copyright 2008 Google Inc. All Rights Reserved. 2 // Author: shobhitsaxena@google.com (Shobhit Saxena) 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // Unless required by applicable law or agreed to in writing, software 8 // distributed under the License is distributed on an "AS IS" BASIS, 9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 // See the License for the specific language governing permissions and 11 // limitations under the License. 12 13 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 14 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 15 16 #include <allheaders.h> 17 #include "ocrblock.h" 18 #include "params.h" 19 20 struct Pix; 21 struct Box; 22 struct Boxa; 23 24 namespace tesseract { 25 26 extern INT_VAR_H(devanagari_split_debuglevel); 27 28 extern BOOL_VAR_H(devanagari_split_debugimage); 29 30 class TBOX; 31 class DebugPixa; 32 33 class PixelHistogram { 34 public: PixelHistogram()35 PixelHistogram() { 36 hist_ = nullptr; 37 length_ = 0; 38 } 39 ~PixelHistogram()40 ~PixelHistogram() { 41 Clear(); 42 } 43 Clear()44 void Clear() { 45 delete[] hist_; 46 length_ = 0; 47 } 48 hist()49 int *hist() const { 50 return hist_; 51 } 52 length()53 int length() const { 54 return length_; 55 } 56 57 // Methods to construct histograms from images. These clear any existing data. 58 void ConstructVerticalCountHist(Image pix); 59 void ConstructHorizontalCountHist(Image pix); 60 61 // This method returns the global-maxima for the histogram. The frequency of 62 // the global maxima is returned in count, if specified. 63 int GetHistogramMaximum(int *count) const; 64 65 private: 66 int *hist_; 67 int length_; 68 }; 69 70 class ShiroRekhaSplitter { 71 public: 72 enum SplitStrategy { 73 NO_SPLIT = 0, // No splitting is performed for the phase. 74 MINIMAL_SPLIT, // Blobs are split minimally. 75 MAXIMAL_SPLIT // Blobs are split maximally. 76 }; 77 78 ShiroRekhaSplitter(); 79 virtual ~ShiroRekhaSplitter(); 80 81 // Top-level method to perform splitting based on current settings. 82 // Returns true if a split was actually performed. 83 // If split_for_pageseg is true, the pageseg_split_strategy_ is used for 84 // splitting. If false, the ocr_split_strategy_ is used. 85 bool Split(bool split_for_pageseg, DebugPixa *pixa_debug); 86 87 // Clears the memory held by this object. 88 void Clear(); 89 90 // Refreshes the words in the segmentation block list by using blobs in the 91 // input blob list. 92 // The segmentation block list must be set. 93 void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs); 94 95 // Returns true if the split strategies for pageseg and ocr are different. HasDifferentSplitStrategies()96 bool HasDifferentSplitStrategies() const { 97 return pageseg_split_strategy_ != ocr_split_strategy_; 98 } 99 100 // This only keeps a copy of the block list pointer. At split call, the list 101 // object should still be alive. This block list is used as a golden 102 // segmentation when performing splitting. set_segmentation_block_list(BLOCK_LIST * block_list)103 void set_segmentation_block_list(BLOCK_LIST *block_list) { 104 segmentation_block_list_ = block_list; 105 } 106 107 static const int kUnspecifiedXheight = -1; 108 set_global_xheight(int xheight)109 void set_global_xheight(int xheight) { 110 global_xheight_ = xheight; 111 } 112 set_perform_close(bool perform)113 void set_perform_close(bool perform) { 114 perform_close_ = perform; 115 } 116 117 // Returns the image obtained from shiro-rekha splitting. The returned object 118 // is owned by this class. Callers may want to clone the returned pix to keep 119 // it alive beyond the life of ShiroRekhaSplitter object. splitted_image()120 Image splitted_image() { 121 return splitted_image_; 122 } 123 124 // On setting the input image, a clone of it is owned by this class. 125 void set_orig_pix(Image pix); 126 127 // Returns the input image provided to the object. This object is owned by 128 // this class. Callers may want to clone the returned pix to work with it. orig_pix()129 Image orig_pix() { 130 return orig_pix_; 131 } 132 ocr_split_strategy()133 SplitStrategy ocr_split_strategy() const { 134 return ocr_split_strategy_; 135 } 136 set_ocr_split_strategy(SplitStrategy strategy)137 void set_ocr_split_strategy(SplitStrategy strategy) { 138 ocr_split_strategy_ = strategy; 139 } 140 pageseg_split_strategy()141 SplitStrategy pageseg_split_strategy() const { 142 return pageseg_split_strategy_; 143 } 144 set_pageseg_split_strategy(SplitStrategy strategy)145 void set_pageseg_split_strategy(SplitStrategy strategy) { 146 pageseg_split_strategy_ = strategy; 147 } 148 segmentation_block_list()149 BLOCK_LIST *segmentation_block_list() { 150 return segmentation_block_list_; 151 } 152 153 // This method returns the computed mode-height of blobs in the pix. 154 // It also prunes very small blobs from calculation. Could be used to provide 155 // a global xheight estimate for images which have the same point-size text. 156 static int GetModeHeight(Image pix); 157 158 private: 159 // Method to perform a close operation on the input image. The xheight 160 // estimate decides the size of sel used. 161 static void PerformClose(Image pix, int xheight_estimate); 162 163 // This method resolves the cc bbox to a particular row and returns the row's 164 // xheight. This uses block_list_ if available, else just returns the 165 // global_xheight_ estimate currently set in the object. 166 int GetXheightForCC(Box *cc_bbox); 167 168 // Returns a list of regions (boxes) which should be cleared in the original 169 // image so as to perform shiro-rekha splitting. Pix is assumed to carry one 170 // (or less) word only. Xheight measure could be the global estimate, the row 171 // estimate, or unspecified. If unspecified, over splitting may occur, since a 172 // conservative estimate of stroke width along with an associated multiplier 173 // is used in its place. It is advisable to have a specified xheight when 174 // splitting for classification/training. 175 void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left, 176 int word_top, Boxa *regions_to_clear); 177 178 // Returns a new box object for the corresponding TBOX, based on the original 179 // image's coordinate system. 180 Box *GetBoxForTBOX(const TBOX &tbox) const; 181 182 // This method returns y-extents of the shiro-rekha computed from the input 183 // word image. 184 static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom, 185 int *shirorekha_ylevel); 186 187 Image orig_pix_; // Just a clone of the input image passed. 188 Image splitted_image_; // Image produced after the last splitting round. The 189 // object is owned by this class. 190 SplitStrategy pageseg_split_strategy_; 191 SplitStrategy ocr_split_strategy_; 192 Image debug_image_; 193 // This block list is used as a golden segmentation when performing splitting. 194 BLOCK_LIST *segmentation_block_list_; 195 int global_xheight_; 196 bool perform_close_; // Whether a morphological close operation should be 197 // performed before CCs are run through splitting. 198 }; 199 200 } // namespace tesseract. 201 202 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 203