1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
15 #define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
16 
17 #include "blobgrid.h" // For BlobGrid
18 
19 struct Pix;
20 
21 namespace tesseract {
22 
23 class DENORM;
24 struct TPOINT;
25 class ColPartition;
26 
27 // Simple class to encapsulate the computation of an image representing
28 // local textline density, and function(s) to make use of it.
29 // The underlying principle is that if you smear connected components
30 // horizontally (vertically for components on a vertically written textline)
31 // and count the number of smeared components in an image, then the resulting
32 // image shows the density of the textlines at each image position.
33 class TESS_API TextlineProjection {
34 public:
35   // The down-scaling factor is computed to obtain a projection resolution
36   // of about 100 dpi, whatever the input.
37   explicit TextlineProjection(int resolution);
38   ~TextlineProjection();
39 
40   // Build the projection profile given the input_block containing lists of
41   // blobs, a rotation to convert to image coords,
42   // and a full-resolution nontext_map, marking out areas to avoid.
43   // During construction, we have the following assumptions:
44   // The rotation is a multiple of 90 degrees, ie no deskew yet.
45   // The blobs have had their left and right rules set to also limit
46   // the range of projection.
47   void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map);
48 
49   // Display the blobs in the window colored according to textline quality.
50   void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win);
51 
52   // Moves blobs that look like they don't sit well on a textline from the
53   // input blobs list to the output small_blobs list.
54   // This gets them away from initial textline finding to stop diacritics
55   // from forming incorrect textlines. (Introduced mainly to fix Thai.)
56   void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const;
57 
58   // Create a window and display the projection in it.
59   void DisplayProjection() const;
60 
61   // Compute the distance of the box from the partition using curved projection
62   // space. As DistanceOfBoxFromBox, except that the direction is taken from
63   // the ColPartition and the median bounds of the ColPartition are used as
64   // the to_box.
65   int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm,
66                                  bool debug) const;
67 
68   // Compute the distance from the from_box to the to_box using curved
69   // projection space. Separation that involves a decrease in projection
70   // density (moving from the from_box to the to_box) is weighted more heavily
71   // than constant density, and an increase is weighted less.
72   // If horizontal_textline is true, then curved space is used vertically,
73   // as for a diacritic on the edge of a textline.
74   // The projection uses original image coords, so denorm is used to get
75   // back to the image coords from box/part space.
76   int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline,
77                            const DENORM *denorm, bool debug) const;
78 
79   // Compute the distance between (x, y1) and (x, y2) using the rule that
80   // a decrease in textline density is weighted more heavily than an increase.
81   // The coordinates are in source image space, ie processed by any denorm
82   // already, but not yet scaled by scale_factor_.
83   // Going from the outside of a textline to the inside should measure much
84   // less distance than going from the inside of a textline to the outside.
85   int VerticalDistance(bool debug, int x, int y1, int y2) const;
86 
87   // Compute the distance between (x1, y) and (x2, y) using the rule that
88   // a decrease in textline density is weighted more heavily than an increase.
89   int HorizontalDistance(bool debug, int x1, int x2, int y) const;
90 
91   // Returns true if the blob appears to be outside of a horizontal textline.
92   // Such blobs are potentially diacritics (even if large in Thai) and should
93   // be kept away from initial textline finding.
94   bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const;
95 
96   // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
97   // but uses the median top/bottom for horizontal and median left/right for
98   // vertical instead of the bounding box edges.
99   // Evaluates for both horizontal and vertical and returns the best result,
100   // with a positive value for horizontal and a negative value for vertical.
101   int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const;
102 
103   // Computes the mean projection gradients over the horizontal and vertical
104   // edges of the box:
105   //   -h-h-h-h-h-h
106   //  |------------| mean=htop   -v|+v--------+v|-v
107   //  |+h+h+h+h+h+h|             -v|+v        +v|-v
108   //  |            |             -v|+v        +v|-v
109   //  |    box     |             -v|+v  box   +v|-v
110   //  |            |             -v|+v        +v|-v
111   //  |+h+h+h+h+h+h|             -v|+v        +v|-v
112   //  |------------| mean=hbot   -v|+v--------+v|-v
113   //   -h-h-h-h-h-h
114   //                           mean=vleft  mean=vright
115   //
116   // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
117   // for a horizontal textline, a negative number for a vertical textline,
118   // and near zero for undecided. Undecided is most likely non-text.
119   int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const;
120 
121 private:
122   // Internal version of EvaluateBox returns the unclipped gradients as well
123   // as the result of EvaluateBox.
124   // hgrad1 and hgrad2 are the gradients for the horizontal textline.
125   int EvaluateBoxInternal(const TBOX &box, const DENORM *denorm, bool debug, int *hgrad1,
126                           int *hgrad2, int *vgrad1, int *vgrad2) const;
127 
128   // Helper returns the mean gradient value for the horizontal row at the given
129   // y, (in the external coordinates) by subtracting the mean of the transformed
130   // row 2 pixels above from the mean of the transformed row 2 pixels below.
131   // This gives a positive value for a good top edge and negative for bottom.
132   // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
133   int BestMeanGradientInRow(const DENORM *denorm, int16_t min_x, int16_t max_x, int16_t y,
134                             bool best_is_max) const;
135 
136   // Helper returns the mean gradient value for the vertical column at the
137   // given x, (in the external coordinates) by subtracting the mean of the
138   // transformed column 2 pixels left from the mean of the transformed column
139   // 2 pixels to the right.
140   // This gives a positive value for a good left edge and negative for right.
141   // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
142   int BestMeanGradientInColumn(const DENORM *denorm, int16_t x, int16_t min_y, int16_t max_y,
143                                bool best_is_max) const;
144 
145   // Helper returns the mean pixel value over the line between the start_pt and
146   // end_pt (inclusive), but shifted perpendicular to the line in the projection
147   // image by offset pixels. For simplicity, it is assumed that the vector is
148   // either nearly horizontal or nearly vertical. It works on skewed textlines!
149   // The end points are in external coordinates, and will be denormalized with
150   // the denorm if not nullptr before further conversion to pix coordinates.
151   // After all the conversions, the offset is added to the direction
152   // perpendicular to the line direction. The offset is thus in projection image
153   // coordinates, which allows the caller to get a guaranteed displacement
154   // between pixels used to calculate gradients.
155   int MeanPixelsInLineSegment(const DENORM *denorm, int offset, TPOINT start_pt,
156                               TPOINT end_pt) const;
157 
158   // Helper function to add 1 to a rectangle in source image coords to the
159   // internal projection pix_.
160   void IncrementRectangle8Bit(const TBOX &box);
161   // Inserts a list of blobs into the projection.
162   // Rotation is a multiple of 90 degrees to get from blob coords to
163   // nontext_map coords, image_box is the bounds of the nontext_map.
164   // Blobs are spread horizontally or vertically according to their internal
165   // flags, but the spreading is truncated by set pixels in the nontext_map
166   // and also by the horizontal rule line limits on the blobs.
167   void ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation, const TBOX &image_box,
168                     Image nontext_map);
169   // Pads the bounding box of the given blob according to whether it is on
170   // a horizontal or vertical text line, taking into account tab-stops near
171   // the blob. Returns true if padding was in the horizontal direction.
172   bool PadBlobBox(BLOBNBOX *blob, TBOX *bbox);
173 
174   // Helper denormalizes the TPOINT with the denorm if not nullptr, then
175   // converts to pix_ coordinates.
176   void TransformToPixCoords(const DENORM *denorm, TPOINT *pt) const;
177 
178   // Helper truncates the TPOINT to be within the pix_.
179   void TruncateToImageBounds(TPOINT *pt) const;
180 
181   // Transform tesseract coordinates to coordinates used in the pix.
182   int ImageXToProjectionX(int x) const;
183   int ImageYToProjectionY(int y) const;
184 
185   // The down-sampling scale factor used in building the image.
186   int scale_factor_;
187   // The blob coordinates of the top-left (origin of the pix_) in tesseract
188   // coordinates. Used to transform the bottom-up tesseract coordinates to
189   // the top-down coordinates of the pix.
190   int x_origin_;
191   int y_origin_;
192   // The image of horizontally smeared blob boxes summed to provide a
193   // textline density map. As with a horizontal projection, the map has
194   // dips in the gaps between textlines.
195   Image pix_;
196 };
197 
198 } // namespace tesseract.
199 
200 #endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
201