1 ///////////////////////////////////////////////////////////////////////
2 // File:        baselinedetect.h
3 // Description: Initial Baseline Determination.
4 // Copyright 2012 Google Inc. All Rights Reserved.
5 // Author:      rays@google.com (Ray Smith)
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18 
19 #ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_
20 #define TESSERACT_TEXTORD_BASELINEDETECT_H_
21 
22 #include "detlinefit.h"
23 #include "points.h"
24 #include "rect.h"
25 
26 struct Pix;
27 
28 namespace tesseract {
29 
30 class Textord;
31 class BLOBNBOX_LIST;
32 class TO_BLOCK;
33 class TO_BLOCK_LIST;
34 class TO_ROW;
35 
36 // Class to compute and hold baseline data for a TO_ROW.
37 class BaselineRow {
38 public:
39   BaselineRow(double line_size, TO_ROW *to_row);
40 
bounding_box()41   const TBOX &bounding_box() const {
42     return bounding_box_;
43   }
44   // Sets the TO_ROW with the output straight line.
45   void SetupOldLineParameters(TO_ROW *row) const;
46 
47   // Outputs diagnostic information.
48   void Print() const;
49 
50   // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
51   double BaselineAngle() const;
52   // Computes and returns the linespacing at the middle of the overlap
53   // between this and other.
54   double SpaceBetween(const BaselineRow &other) const;
55   // Computes and returns the displacement of the center of the line
56   // perpendicular to the given direction.
57   double PerpDisp(const FCOORD &direction) const;
58   // Computes the y coordinate at the given x using the straight baseline
59   // defined by baseline1_ and baseline2_.
60   double StraightYAtX(double x) const;
61 
62   // Fits a straight baseline to the points. Returns true if it had enough
63   // points to be reasonably sure of the fitted baseline.
64   // If use_box_bottoms is false, baselines positions are formed by
65   // considering the outlines of the blobs.
66   bool FitBaseline(bool use_box_bottoms);
67   // Modifies an existing result of FitBaseline to be parallel to the given
68   // vector if that produces a better result.
69   void AdjustBaselineToParallel(int debug, const FCOORD &direction);
70   // Modifies the baseline to snap to the textline grid if the existing
71   // result is not good enough.
72   double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing,
73                               double line_offset);
74 
75 private:
76   // Sets up displacement_modes_ with the top few modes of the perpendicular
77   // distance of each blob from the given direction vector, after rounding.
78   void SetupBlobDisplacements(const FCOORD &direction);
79 
80   // Fits a line in the given direction to blobs that are close to the given
81   // target_offset perpendicular displacement from the direction. The fit
82   // error is allowed to be cheat_allowance worse than the existing fit, and
83   // will still be used.
84   // If cheat_allowance > 0, the new fit will be good and replace the current
85   // fit if it has better fit (with cheat) OR its error is below
86   // max_baseline_error_ and the old fit is marked bad.
87   // Otherwise the new fit will only replace the old if it is really better,
88   // or the old fit is marked bad and the new fit has sufficient points, as
89   // well as being within the max_baseline_error_.
90   void FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance,
91                               double target_offset);
92   // Returns the perpendicular distance of the point from the straight
93   // baseline.
94   float PerpDistanceFromBaseline(const FCOORD &pt) const;
95   // Computes the bounding box of the row.
96   void ComputeBoundingBox();
97 
98   // The blobs of the row to which this BaselineRow adds extra information
99   // during baseline fitting. Note that blobs_ could easily come from either
100   // a TO_ROW or a ColPartition.
101   BLOBNBOX_LIST *blobs_;
102   // Bounding box of all the blobs.
103   TBOX bounding_box_;
104   // Fitter used to fit lines to the blobs.
105   DetLineFit fitter_;
106   // 2 points on the straight baseline.
107   FCOORD baseline_pt1_;
108   FCOORD baseline_pt2_;
109   // Set of modes of displacements. They indicate preferable baseline positions.
110   std::vector<double> displacement_modes_;
111   // Quantization factor used for displacement_modes_.
112   double disp_quant_factor_;
113   // Half the acceptance range of blob displacements for computing the
114   // error during a constrained fit.
115   double fit_halfrange_;
116   // Max baseline error before a line is regarded as fitting badly.
117   double max_baseline_error_;
118   // The error of fit of the baseline.
119   double baseline_error_;
120   // True if this row seems to have a good baseline.
121   bool good_baseline_;
122 };
123 
124 // Class to compute and hold baseline data for a TO_BLOCK.
125 class BaselineBlock {
126 public:
127   BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block);
128 
~BaselineBlock()129   ~BaselineBlock() {
130     for (auto row : rows_) {
131       delete row;
132     }
133   }
134 
block()135   TO_BLOCK *block() const {
136     return block_;
137   }
skew_angle()138   double skew_angle() const {
139     return skew_angle_;
140   }
141 
142   // Computes and returns the absolute error of the given perp_disp from the
143   // given linespacing model.
144   static double SpacingModelError(double perp_disp, double line_spacing, double line_offset);
145 
146   // Fits straight line baselines and computes the skew angle from the
147   // median angle. Returns true if a good angle is found.
148   // If use_box_bottoms is false, baseline positions are formed by
149   // considering the outlines of the blobs.
150   bool FitBaselinesAndFindSkew(bool use_box_bottoms);
151 
152   // Refits the baseline to a constrained angle, using the stored block
153   // skew if good enough, otherwise the supplied default skew.
154   void ParallelizeBaselines(double default_block_skew);
155 
156   // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
157   void SetupBlockParameters() const;
158 
159   // Processing that is required before fitting baseline splines, but requires
160   // linear baselines in order to be successful:
161   //   Removes noise if required
162   //   Separates out underlines
163   //   Pre-associates blob fragments.
164   // TODO(rays/joeliu) This entire section of code is inherited from the past
165   // and could be improved/eliminated.
166   // page_tr is used to size a debug window.
167   void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise);
168 
169   // Fits splines to the textlines, or creates fake QSPLINES from the straight
170   // baselines that are already on the TO_ROWs.
171   // As a side-effect, computes the xheights of the rows and the block.
172   // Although x-height estimation is conceptually separate, it is part of
173   // detecting perspective distortion and therefore baseline fitting.
174   void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord);
175 
176   // Draws the (straight) baselines and final blobs colored according to
177   // what was discarded as noise and what is associated with each row.
178   void DrawFinalRows(const ICOORD &page_tr);
179 
180   // Render the generated spline baselines for this block on pix_in.
181   void DrawPixSpline(Image pix_in);
182 
183 private:
184   // Top-level line-spacing calculation. Computes an estimate of the line-
185   // spacing, using the current baselines in the TO_ROWS of the block, and
186   // then refines it by fitting a regression line to the baseline positions
187   // as a function of their integer index.
188   // Returns true if it seems that the model is a reasonable fit to the
189   // observations.
190   bool ComputeLineSpacing();
191 
192   // Computes the deskewed vertical position of each baseline in the block and
193   // stores them in the given vector.
194   void ComputeBaselinePositions(const FCOORD &direction, std::vector<double> *positions);
195 
196   // Computes an estimate of the line spacing of the block from the median
197   // of the spacings between adjacent overlapping textlines.
198   void EstimateLineSpacing();
199 
200   // Refines the line spacing of the block by fitting a regression
201   // line to the deskewed y-position of each baseline as a function of its
202   // estimated line index, allowing for a small error in the initial linespacing
203   // and choosing the best available model.
204   void RefineLineSpacing(const std::vector<double> &positions);
205 
206   // Given an initial estimate of line spacing (m_in) and the positions of each
207   // baseline, computes the line spacing of the block more accurately in m_out,
208   // and the corresponding intercept in c_out, and the number of spacings seen
209   // in index_delta. Returns the error of fit to the line spacing model.
210   double FitLineSpacingModel(const std::vector<double> &positions, double m_in, double *m_out,
211                              double *c_out, int *index_delta);
212 
213   // The block to which this class adds extra information used during baseline
214   // calculation.
215   TO_BLOCK *block_;
216   // The rows in the block that we will be working with.
217   std::vector<BaselineRow *> rows_;
218   // Amount of debugging output to provide.
219   int debug_level_;
220   // True if the block is non-text (graphic).
221   bool non_text_block_;
222   // True if the block has at least one good enough baseline to compute the
223   // skew angle and therefore skew_angle_ is valid.
224   bool good_skew_angle_;
225   // Angle of skew in radians using the conventional anticlockwise from x-axis.
226   double skew_angle_;
227   // Current best estimate line spacing in pixels perpendicular to skew_angle_.
228   double line_spacing_;
229   // Offset for baseline positions, in pixels. Each baseline is at
230   // line_spacing_ * n + line_offset_ for integer n, which represents
231   // [textline] line number in a line numbering system that has line 0 on or
232   // at least near the x-axis. Not equal to the actual line number of a line
233   // within a block as most blocks are not near the x-axis.
234   double line_offset_;
235   // The error of the line spacing model.
236   double model_error_;
237 };
238 
239 class BaselineDetect {
240 public:
241   BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks);
242 
~BaselineDetect()243   ~BaselineDetect() {
244     for (auto block : blocks_) {
245       delete block;
246     }
247   }
248 
249   // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
250   // block-wise and page-wise data to smooth small blocks/rows, and applies
251   // smoothing based on block/page-level skew and block-level linespacing.
252   void ComputeStraightBaselines(bool use_box_bottoms);
253 
254   // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
255   // other associated side-effects, including pre-associating blobs, computing
256   // x-heights and displaying debug information.
257   // NOTE that ComputeStraightBaselines must have been called first as this
258   // sets up data in the TO_ROWs upon which this function depends.
259   void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines,
260                                          bool remove_noise, bool show_final_rows, Textord *textord);
261 
262 private:
263   // Average (median) skew of the blocks on the page among those that have
264   // a good angle of their own.
265   FCOORD page_skew_;
266   // Amount of debug output to produce.
267   int debug_level_;
268   // The blocks that we are working with.
269   std::vector<BaselineBlock *> blocks_;
270 };
271 
272 } // namespace tesseract
273 
274 #endif // TESSERACT_TEXTORD_BASELINEDETECT_H_
275