1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: shobhitsaxena@google.com (Shobhit Saxena)
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15 
16 #include <allheaders.h>
17 #include "ocrblock.h"
18 #include "params.h"
19 
20 struct Pix;
21 struct Box;
22 struct Boxa;
23 
24 namespace tesseract {
25 
26 extern INT_VAR_H(devanagari_split_debuglevel);
27 
28 extern BOOL_VAR_H(devanagari_split_debugimage);
29 
30 class TBOX;
31 class DebugPixa;
32 
33 class PixelHistogram {
34 public:
PixelHistogram()35   PixelHistogram() {
36     hist_ = nullptr;
37     length_ = 0;
38   }
39 
~PixelHistogram()40   ~PixelHistogram() {
41     Clear();
42   }
43 
Clear()44   void Clear() {
45     delete[] hist_;
46     length_ = 0;
47   }
48 
hist()49   int *hist() const {
50     return hist_;
51   }
52 
length()53   int length() const {
54     return length_;
55   }
56 
57   // Methods to construct histograms from images. These clear any existing data.
58   void ConstructVerticalCountHist(Image pix);
59   void ConstructHorizontalCountHist(Image pix);
60 
61   // This method returns the global-maxima for the histogram. The frequency of
62   // the global maxima is returned in count, if specified.
63   int GetHistogramMaximum(int *count) const;
64 
65 private:
66   int *hist_;
67   int length_;
68 };
69 
70 class ShiroRekhaSplitter {
71 public:
72   enum SplitStrategy {
73     NO_SPLIT = 0,  // No splitting is performed for the phase.
74     MINIMAL_SPLIT, // Blobs are split minimally.
75     MAXIMAL_SPLIT  // Blobs are split maximally.
76   };
77 
78   ShiroRekhaSplitter();
79   virtual ~ShiroRekhaSplitter();
80 
81   // Top-level method to perform splitting based on current settings.
82   // Returns true if a split was actually performed.
83   // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
84   // splitting. If false, the ocr_split_strategy_ is used.
85   bool Split(bool split_for_pageseg, DebugPixa *pixa_debug);
86 
87   // Clears the memory held by this object.
88   void Clear();
89 
90   // Refreshes the words in the segmentation block list by using blobs in the
91   // input blob list.
92   // The segmentation block list must be set.
93   void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs);
94 
95   // Returns true if the split strategies for pageseg and ocr are different.
HasDifferentSplitStrategies()96   bool HasDifferentSplitStrategies() const {
97     return pageseg_split_strategy_ != ocr_split_strategy_;
98   }
99 
100   // This only keeps a copy of the block list pointer. At split call, the list
101   // object should still be alive. This block list is used as a golden
102   // segmentation when performing splitting.
set_segmentation_block_list(BLOCK_LIST * block_list)103   void set_segmentation_block_list(BLOCK_LIST *block_list) {
104     segmentation_block_list_ = block_list;
105   }
106 
107   static const int kUnspecifiedXheight = -1;
108 
set_global_xheight(int xheight)109   void set_global_xheight(int xheight) {
110     global_xheight_ = xheight;
111   }
112 
set_perform_close(bool perform)113   void set_perform_close(bool perform) {
114     perform_close_ = perform;
115   }
116 
117   // Returns the image obtained from shiro-rekha splitting. The returned object
118   // is owned by this class. Callers may want to clone the returned pix to keep
119   // it alive beyond the life of ShiroRekhaSplitter object.
splitted_image()120   Image splitted_image() {
121     return splitted_image_;
122   }
123 
124   // On setting the input image, a clone of it is owned by this class.
125   void set_orig_pix(Image pix);
126 
127   // Returns the input image provided to the object. This object is owned by
128   // this class. Callers may want to clone the returned pix to work with it.
orig_pix()129   Image orig_pix() {
130     return orig_pix_;
131   }
132 
ocr_split_strategy()133   SplitStrategy ocr_split_strategy() const {
134     return ocr_split_strategy_;
135   }
136 
set_ocr_split_strategy(SplitStrategy strategy)137   void set_ocr_split_strategy(SplitStrategy strategy) {
138     ocr_split_strategy_ = strategy;
139   }
140 
pageseg_split_strategy()141   SplitStrategy pageseg_split_strategy() const {
142     return pageseg_split_strategy_;
143   }
144 
set_pageseg_split_strategy(SplitStrategy strategy)145   void set_pageseg_split_strategy(SplitStrategy strategy) {
146     pageseg_split_strategy_ = strategy;
147   }
148 
segmentation_block_list()149   BLOCK_LIST *segmentation_block_list() {
150     return segmentation_block_list_;
151   }
152 
153   // This method returns the computed mode-height of blobs in the pix.
154   // It also prunes very small blobs from calculation. Could be used to provide
155   // a global xheight estimate for images which have the same point-size text.
156   static int GetModeHeight(Image pix);
157 
158 private:
159   // Method to perform a close operation on the input image. The xheight
160   // estimate decides the size of sel used.
161   static void PerformClose(Image pix, int xheight_estimate);
162 
163   // This method resolves the cc bbox to a particular row and returns the row's
164   // xheight. This uses block_list_ if available, else just returns the
165   // global_xheight_ estimate currently set in the object.
166   int GetXheightForCC(Box *cc_bbox);
167 
168   // Returns a list of regions (boxes) which should be cleared in the original
169   // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
170   // (or less) word only. Xheight measure could be the global estimate, the row
171   // estimate, or unspecified. If unspecified, over splitting may occur, since a
172   // conservative estimate of stroke width along with an associated multiplier
173   // is used in its place. It is advisable to have a specified xheight when
174   // splitting for classification/training.
175   void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,
176                            int word_top, Boxa *regions_to_clear);
177 
178   // Returns a new box object for the corresponding TBOX, based on the original
179   // image's coordinate system.
180   Box *GetBoxForTBOX(const TBOX &tbox) const;
181 
182   // This method returns y-extents of the shiro-rekha computed from the input
183   // word image.
184   static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom,
185                                     int *shirorekha_ylevel);
186 
187   Image orig_pix_;       // Just a clone of the input image passed.
188   Image splitted_image_; // Image produced after the last splitting round. The
189                         // object is owned by this class.
190   SplitStrategy pageseg_split_strategy_;
191   SplitStrategy ocr_split_strategy_;
192   Image debug_image_;
193   // This block list is used as a golden segmentation when performing splitting.
194   BLOCK_LIST *segmentation_block_list_;
195   int global_xheight_;
196   bool perform_close_; // Whether a morphological close operation should be
197                        // performed before CCs are run through splitting.
198 };
199 
200 } // namespace tesseract.
201 
202 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
203