1 /////////////////////////////////////////////////////////////////////// 2 // File: baselinedetect.h 3 // Description: Initial Baseline Determination. 4 // Copyright 2012 Google Inc. All Rights Reserved. 5 // Author: rays@google.com (Ray Smith) 6 // 7 // Licensed under the Apache License, Version 2.0 (the "License"); 8 // you may not use this file except in compliance with the License. 9 // You may obtain a copy of the License at 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 /////////////////////////////////////////////////////////////////////// 18 19 #ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_ 20 #define TESSERACT_TEXTORD_BASELINEDETECT_H_ 21 22 #include "detlinefit.h" 23 #include "points.h" 24 #include "rect.h" 25 26 struct Pix; 27 28 namespace tesseract { 29 30 class Textord; 31 class BLOBNBOX_LIST; 32 class TO_BLOCK; 33 class TO_BLOCK_LIST; 34 class TO_ROW; 35 36 // Class to compute and hold baseline data for a TO_ROW. 37 class BaselineRow { 38 public: 39 BaselineRow(double line_size, TO_ROW *to_row); 40 bounding_box()41 const TBOX &bounding_box() const { 42 return bounding_box_; 43 } 44 // Sets the TO_ROW with the output straight line. 45 void SetupOldLineParameters(TO_ROW *row) const; 46 47 // Outputs diagnostic information. 48 void Print() const; 49 50 // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. 51 double BaselineAngle() const; 52 // Computes and returns the linespacing at the middle of the overlap 53 // between this and other. 54 double SpaceBetween(const BaselineRow &other) const; 55 // Computes and returns the displacement of the center of the line 56 // perpendicular to the given direction. 57 double PerpDisp(const FCOORD &direction) const; 58 // Computes the y coordinate at the given x using the straight baseline 59 // defined by baseline1_ and baseline2_. 60 double StraightYAtX(double x) const; 61 62 // Fits a straight baseline to the points. Returns true if it had enough 63 // points to be reasonably sure of the fitted baseline. 64 // If use_box_bottoms is false, baselines positions are formed by 65 // considering the outlines of the blobs. 66 bool FitBaseline(bool use_box_bottoms); 67 // Modifies an existing result of FitBaseline to be parallel to the given 68 // vector if that produces a better result. 69 void AdjustBaselineToParallel(int debug, const FCOORD &direction); 70 // Modifies the baseline to snap to the textline grid if the existing 71 // result is not good enough. 72 double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, 73 double line_offset); 74 75 private: 76 // Sets up displacement_modes_ with the top few modes of the perpendicular 77 // distance of each blob from the given direction vector, after rounding. 78 void SetupBlobDisplacements(const FCOORD &direction); 79 80 // Fits a line in the given direction to blobs that are close to the given 81 // target_offset perpendicular displacement from the direction. The fit 82 // error is allowed to be cheat_allowance worse than the existing fit, and 83 // will still be used. 84 // If cheat_allowance > 0, the new fit will be good and replace the current 85 // fit if it has better fit (with cheat) OR its error is below 86 // max_baseline_error_ and the old fit is marked bad. 87 // Otherwise the new fit will only replace the old if it is really better, 88 // or the old fit is marked bad and the new fit has sufficient points, as 89 // well as being within the max_baseline_error_. 90 void FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance, 91 double target_offset); 92 // Returns the perpendicular distance of the point from the straight 93 // baseline. 94 float PerpDistanceFromBaseline(const FCOORD &pt) const; 95 // Computes the bounding box of the row. 96 void ComputeBoundingBox(); 97 98 // The blobs of the row to which this BaselineRow adds extra information 99 // during baseline fitting. Note that blobs_ could easily come from either 100 // a TO_ROW or a ColPartition. 101 BLOBNBOX_LIST *blobs_; 102 // Bounding box of all the blobs. 103 TBOX bounding_box_; 104 // Fitter used to fit lines to the blobs. 105 DetLineFit fitter_; 106 // 2 points on the straight baseline. 107 FCOORD baseline_pt1_; 108 FCOORD baseline_pt2_; 109 // Set of modes of displacements. They indicate preferable baseline positions. 110 std::vector<double> displacement_modes_; 111 // Quantization factor used for displacement_modes_. 112 double disp_quant_factor_; 113 // Half the acceptance range of blob displacements for computing the 114 // error during a constrained fit. 115 double fit_halfrange_; 116 // Max baseline error before a line is regarded as fitting badly. 117 double max_baseline_error_; 118 // The error of fit of the baseline. 119 double baseline_error_; 120 // True if this row seems to have a good baseline. 121 bool good_baseline_; 122 }; 123 124 // Class to compute and hold baseline data for a TO_BLOCK. 125 class BaselineBlock { 126 public: 127 BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block); 128 ~BaselineBlock()129 ~BaselineBlock() { 130 for (auto row : rows_) { 131 delete row; 132 } 133 } 134 block()135 TO_BLOCK *block() const { 136 return block_; 137 } skew_angle()138 double skew_angle() const { 139 return skew_angle_; 140 } 141 142 // Computes and returns the absolute error of the given perp_disp from the 143 // given linespacing model. 144 static double SpacingModelError(double perp_disp, double line_spacing, double line_offset); 145 146 // Fits straight line baselines and computes the skew angle from the 147 // median angle. Returns true if a good angle is found. 148 // If use_box_bottoms is false, baseline positions are formed by 149 // considering the outlines of the blobs. 150 bool FitBaselinesAndFindSkew(bool use_box_bottoms); 151 152 // Refits the baseline to a constrained angle, using the stored block 153 // skew if good enough, otherwise the supplied default skew. 154 void ParallelizeBaselines(double default_block_skew); 155 156 // Sets the parameters in TO_BLOCK that are needed by subsequent processes. 157 void SetupBlockParameters() const; 158 159 // Processing that is required before fitting baseline splines, but requires 160 // linear baselines in order to be successful: 161 // Removes noise if required 162 // Separates out underlines 163 // Pre-associates blob fragments. 164 // TODO(rays/joeliu) This entire section of code is inherited from the past 165 // and could be improved/eliminated. 166 // page_tr is used to size a debug window. 167 void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise); 168 169 // Fits splines to the textlines, or creates fake QSPLINES from the straight 170 // baselines that are already on the TO_ROWs. 171 // As a side-effect, computes the xheights of the rows and the block. 172 // Although x-height estimation is conceptually separate, it is part of 173 // detecting perspective distortion and therefore baseline fitting. 174 void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord); 175 176 // Draws the (straight) baselines and final blobs colored according to 177 // what was discarded as noise and what is associated with each row. 178 void DrawFinalRows(const ICOORD &page_tr); 179 180 // Render the generated spline baselines for this block on pix_in. 181 void DrawPixSpline(Image pix_in); 182 183 private: 184 // Top-level line-spacing calculation. Computes an estimate of the line- 185 // spacing, using the current baselines in the TO_ROWS of the block, and 186 // then refines it by fitting a regression line to the baseline positions 187 // as a function of their integer index. 188 // Returns true if it seems that the model is a reasonable fit to the 189 // observations. 190 bool ComputeLineSpacing(); 191 192 // Computes the deskewed vertical position of each baseline in the block and 193 // stores them in the given vector. 194 void ComputeBaselinePositions(const FCOORD &direction, std::vector<double> *positions); 195 196 // Computes an estimate of the line spacing of the block from the median 197 // of the spacings between adjacent overlapping textlines. 198 void EstimateLineSpacing(); 199 200 // Refines the line spacing of the block by fitting a regression 201 // line to the deskewed y-position of each baseline as a function of its 202 // estimated line index, allowing for a small error in the initial linespacing 203 // and choosing the best available model. 204 void RefineLineSpacing(const std::vector<double> &positions); 205 206 // Given an initial estimate of line spacing (m_in) and the positions of each 207 // baseline, computes the line spacing of the block more accurately in m_out, 208 // and the corresponding intercept in c_out, and the number of spacings seen 209 // in index_delta. Returns the error of fit to the line spacing model. 210 double FitLineSpacingModel(const std::vector<double> &positions, double m_in, double *m_out, 211 double *c_out, int *index_delta); 212 213 // The block to which this class adds extra information used during baseline 214 // calculation. 215 TO_BLOCK *block_; 216 // The rows in the block that we will be working with. 217 std::vector<BaselineRow *> rows_; 218 // Amount of debugging output to provide. 219 int debug_level_; 220 // True if the block is non-text (graphic). 221 bool non_text_block_; 222 // True if the block has at least one good enough baseline to compute the 223 // skew angle and therefore skew_angle_ is valid. 224 bool good_skew_angle_; 225 // Angle of skew in radians using the conventional anticlockwise from x-axis. 226 double skew_angle_; 227 // Current best estimate line spacing in pixels perpendicular to skew_angle_. 228 double line_spacing_; 229 // Offset for baseline positions, in pixels. Each baseline is at 230 // line_spacing_ * n + line_offset_ for integer n, which represents 231 // [textline] line number in a line numbering system that has line 0 on or 232 // at least near the x-axis. Not equal to the actual line number of a line 233 // within a block as most blocks are not near the x-axis. 234 double line_offset_; 235 // The error of the line spacing model. 236 double model_error_; 237 }; 238 239 class BaselineDetect { 240 public: 241 BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks); 242 ~BaselineDetect()243 ~BaselineDetect() { 244 for (auto block : blocks_) { 245 delete block; 246 } 247 } 248 249 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers 250 // block-wise and page-wise data to smooth small blocks/rows, and applies 251 // smoothing based on block/page-level skew and block-level linespacing. 252 void ComputeStraightBaselines(bool use_box_bottoms); 253 254 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and 255 // other associated side-effects, including pre-associating blobs, computing 256 // x-heights and displaying debug information. 257 // NOTE that ComputeStraightBaselines must have been called first as this 258 // sets up data in the TO_ROWs upon which this function depends. 259 void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, 260 bool remove_noise, bool show_final_rows, Textord *textord); 261 262 private: 263 // Average (median) skew of the blocks on the page among those that have 264 // a good angle of their own. 265 FCOORD page_skew_; 266 // Amount of debug output to produce. 267 int debug_level_; 268 // The blocks that we are working with. 269 std::vector<BaselineBlock *> blocks_; 270 }; 271 272 } // namespace tesseract 273 274 #endif // TESSERACT_TEXTORD_BASELINEDETECT_H_ 275