1 /////////////////////////////////////////////////////////////////////// 2 // File: textord.h 3 // Description: The Textord class definition gathers text line and word 4 // finding functionality. 5 // Author: Ray Smith 6 // Created: Fri Mar 13 14:29:01 PDT 2009 7 // 8 // (C) Copyright 2009, Google Inc. 9 // Licensed under the Apache License, Version 2.0 (the "License"); 10 // you may not use this file except in compliance with the License. 11 // You may obtain a copy of the License at 12 // http://www.apache.org/licenses/LICENSE-2.0 13 // Unless required by applicable law or agreed to in writing, software 14 // distributed under the License is distributed on an "AS IS" BASIS, 15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 // See the License for the specific language governing permissions and 17 // limitations under the License. 18 // 19 /////////////////////////////////////////////////////////////////////// 20 21 #ifndef TESSERACT_TEXTORD_TEXTORD_H_ 22 #define TESSERACT_TEXTORD_TEXTORD_H_ 23 24 #include "bbgrid.h" 25 #include "blobbox.h" 26 #include "ccstruct.h" 27 #include "gap_map.h" 28 29 #include <tesseract/publictypes.h> // For PageSegMode. 30 31 namespace tesseract { 32 33 class FCOORD; 34 class BLOCK_LIST; 35 class PAGE_RES; 36 class TO_BLOCK; 37 class TO_BLOCK_LIST; 38 class ScrollView; 39 40 // A simple class that can be used by BBGrid to hold a word and an expanded 41 // bounding box that makes it easy to find words to put diacritics. 42 class WordWithBox { 43 public: WordWithBox()44 WordWithBox() : word_(nullptr) {} WordWithBox(WERD * word)45 explicit WordWithBox(WERD *word) : word_(word), bounding_box_(word->bounding_box()) { 46 int height = bounding_box_.height(); 47 bounding_box_.pad(height, height); 48 } 49 bounding_box()50 const TBOX &bounding_box() const { 51 return bounding_box_; 52 } 53 // Returns the bounding box of only the good blobs. true_bounding_box()54 TBOX true_bounding_box() const { 55 return word_->true_bounding_box(); 56 } RejBlobs()57 C_BLOB_LIST *RejBlobs() const { 58 return word_->rej_cblob_list(); 59 } word()60 const WERD *word() const { 61 return word_; 62 } 63 64 private: 65 // Borrowed pointer to a real word somewhere that must outlive this class. 66 WERD *word_; 67 // Cached expanded bounding box of the word, padded all round by its height. 68 TBOX bounding_box_; 69 }; 70 71 // Make it usable by BBGrid. 72 CLISTIZEH(WordWithBox) 73 using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>; 74 using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>; 75 76 class Textord { 77 public: 78 explicit Textord(CCStruct *ccstruct); 79 ~Textord() = default; 80 81 // Make the textlines and words inside each block. 82 // binary_pix is mandatory and is the binarized input after line removal. 83 // grey_pix is optional, but if present must match the binary_pix in size, 84 // and must be a *real* grey image instead of binary_pix * 255. 85 // thresholds_pix is expected to be present iff grey_pix is present and 86 // can be an integer factor reduction of the grey_pix. It represents the 87 // thresholds that were used to create the binary_pix from the grey_pix. 88 // diacritic_blobs contain small confusing components that should be added 89 // to the appropriate word(s) in case they are really diacritics. 90 void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, 91 Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, 92 BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); 93 94 // If we were supposed to return only a single textline, and there is more 95 // than one, clean up and leave only the best. 96 void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res); 97 use_cjk_fp_model()98 bool use_cjk_fp_model() const { 99 return use_cjk_fp_model_; 100 } set_use_cjk_fp_model(bool flag)101 void set_use_cjk_fp_model(bool flag) { 102 use_cjk_fp_model_ = flag; 103 } 104 105 // tospace.cpp /////////////////////////////////////////// 106 void to_spacing(ICOORD page_tr, // topright of page 107 TO_BLOCK_LIST *blocks // blocks on page 108 ); 109 ROW *make_prop_words(TO_ROW *row, // row to make 110 FCOORD rotation // for drawing 111 ); 112 ROW *make_blob_words(TO_ROW *row, // row to make 113 FCOORD rotation // for drawing 114 ); 115 // tordmain.cpp /////////////////////////////////////////// 116 void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); 117 void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on); 118 119 private: 120 // For underlying memory management and other utilities. 121 CCStruct *ccstruct_; 122 123 // The size of the input image. 124 ICOORD page_tr_; 125 126 bool use_cjk_fp_model_; 127 128 // makerow.cpp /////////////////////////////////////////// 129 // Make the textlines inside each block. 130 void MakeRows(PageSegMode pageseg_mode, const FCOORD &skew, int width, int height, 131 TO_BLOCK_LIST *to_blocks); 132 // Make the textlines inside a single block. 133 void MakeBlockRows(int min_spacing, int max_spacing, const FCOORD &skew, TO_BLOCK *block, 134 ScrollView *win); 135 136 public: 137 void compute_block_xheight(TO_BLOCK *block, float gradient); 138 void compute_row_xheight(TO_ROW *row, // row to do 139 const FCOORD &rotation, 140 float gradient, // global skew 141 int block_line_size); 142 void make_spline_rows(TO_BLOCK *block, // block to do 143 float gradient, // gradient to fit 144 bool testing_on); 145 146 private: 147 //// oldbasel.cpp //////////////////////////////////////// 148 void make_old_baselines(TO_BLOCK *block, // block to do 149 bool testing_on, // correct orientation 150 float gradient); 151 void correlate_lines(TO_BLOCK *block, float gradient); 152 void correlate_neighbours(TO_BLOCK *block, // block rows are in. 153 TO_ROW **rows, // rows of block. 154 int rowcount); // no of rows to do. 155 int correlate_with_stats(TO_ROW **rows, // rows of block. 156 int rowcount, // no of rows to do. 157 TO_BLOCK *block); 158 void find_textlines(TO_BLOCK *block, // block row is in 159 TO_ROW *row, // row to do 160 int degree, // required approximation 161 QSPLINE *spline); // starting spline 162 // tospace.cpp /////////////////////////////////////////// 163 // DEBUG USE ONLY 164 void block_spacing_stats(TO_BLOCK *block, GAPMAP *gapmap, bool &old_text_ord_proportional, 165 // resulting estimate 166 int16_t &block_space_gap_width, 167 // resulting estimate 168 int16_t &block_non_space_gap_width); 169 void row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx, int16_t row_idx, 170 // estimate for block 171 int16_t block_space_gap_width, 172 // estimate for block 173 int16_t block_non_space_gap_width); 174 void old_to_method(TO_ROW *row, STATS *all_gap_stats, STATS *space_gap_stats, 175 STATS *small_gap_stats, int16_t block_space_gap_width, 176 // estimate for block 177 int16_t block_non_space_gap_width); 178 bool isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_stats, bool suspected_table, 179 int16_t block_idx, int16_t row_idx); 180 int16_t stats_count_under(STATS *stats, int16_t threshold); 181 void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); 182 bool make_a_word_break(TO_ROW *row, // row being made 183 TBOX blob_box, // for next_blob // how many blanks? 184 int16_t prev_gap, TBOX prev_blob_box, int16_t real_current_gap, 185 int16_t within_xht_current_gap, TBOX next_blob_box, int16_t next_gap, 186 uint8_t &blanks, bool &fuzzy_sp, bool &fuzzy_non, 187 bool &prev_gap_was_a_space, bool &break_at_next_gap); 188 bool narrow_blob(TO_ROW *row, TBOX blob_box); 189 bool wide_blob(TO_ROW *row, TBOX blob_box); 190 bool suspected_punct_blob(TO_ROW *row, TBOX box); 191 void peek_at_next_gap(TO_ROW *row, BLOBNBOX_IT box_it, TBOX &next_blob_box, int16_t &next_gap, 192 int16_t &next_within_xht_gap); 193 void mark_gap(TBOX blob, // blob following gap 194 int16_t rule, // heuristic id 195 int16_t prev_gap, int16_t prev_blob_width, int16_t current_gap, 196 int16_t next_blob_width, int16_t next_gap); 197 float find_mean_blob_spacing(WERD *word); 198 bool ignore_big_gap(TO_ROW *row, int32_t row_length, GAPMAP *gapmap, int16_t left, int16_t right); 199 // get bounding box 200 TBOX reduced_box_next(TO_ROW *row, // current row 201 BLOBNBOX_IT *it // iterator to blobds 202 ); 203 TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht); 204 // tordmain.cpp /////////////////////////////////////////// 205 float filter_noise_blobs(BLOBNBOX_LIST *src_list, BLOBNBOX_LIST *noise_list, 206 BLOBNBOX_LIST *small_list, BLOBNBOX_LIST *large_list); 207 // Fixes the block so it obeys all the rules: 208 // Must have at least one ROW. 209 // Must have at least one WERD. 210 // WERDs contain a fake blob. 211 void cleanup_nontext_block(BLOCK *block); 212 void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks); 213 bool clean_noise_from_row(ROW *row); 214 void clean_noise_from_words(ROW *row); 215 // Remove outlines that are a tiny fraction in either width or height 216 // of the word height. 217 void clean_small_noise_from_words(ROW *row); 218 // Groups blocks by rotation, then, for each group, makes a WordGrid and calls 219 // TransferDiacriticsToWords to copy the diacritic blobs to the most 220 // appropriate words in the group of blocks. Source blobs are not touched. 221 void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks); 222 // Places a copy of blobs that are near a word (after applying rotation to the 223 // blob) in the most appropriate word, unless there is doubt, in which case a 224 // blob can end up in two words. Source blobs are not touched. 225 void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, const FCOORD &rotation, 226 WordGrid *word_grid); 227 228 public: 229 // makerow.cpp /////////////////////////////////////////// 230 BOOL_VAR_H(textord_single_height_mode); 231 // tospace.cpp /////////////////////////////////////////// 232 BOOL_VAR_H(tosp_old_to_method); 233 BOOL_VAR_H(tosp_old_to_constrain_sp_kn); 234 BOOL_VAR_H(tosp_only_use_prop_rows); 235 BOOL_VAR_H(tosp_force_wordbreak_on_punct); 236 BOOL_VAR_H(tosp_use_pre_chopping); 237 BOOL_VAR_H(tosp_old_to_bug_fix); 238 BOOL_VAR_H(tosp_block_use_cert_spaces); 239 BOOL_VAR_H(tosp_row_use_cert_spaces); 240 BOOL_VAR_H(tosp_narrow_blobs_not_cert); 241 BOOL_VAR_H(tosp_row_use_cert_spaces1); 242 BOOL_VAR_H(tosp_recovery_isolated_row_stats); 243 BOOL_VAR_H(tosp_only_small_gaps_for_kern); 244 BOOL_VAR_H(tosp_all_flips_fuzzy); 245 BOOL_VAR_H(tosp_fuzzy_limit_all); 246 BOOL_VAR_H(tosp_stats_use_xht_gaps); 247 BOOL_VAR_H(tosp_use_xht_gaps); 248 BOOL_VAR_H(tosp_only_use_xht_gaps); 249 BOOL_VAR_H(tosp_rule_9_test_punct); 250 BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp); 251 BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn); 252 BOOL_VAR_H(tosp_improve_thresh); 253 INT_VAR_H(tosp_debug_level); 254 INT_VAR_H(tosp_enough_space_samples_for_median); 255 INT_VAR_H(tosp_redo_kern_limit); 256 INT_VAR_H(tosp_few_samples); 257 INT_VAR_H(tosp_short_row); 258 INT_VAR_H(tosp_sanity_method); 259 double_VAR_H(tosp_old_sp_kn_th_factor); 260 double_VAR_H(tosp_threshold_bias1); 261 double_VAR_H(tosp_threshold_bias2); 262 double_VAR_H(tosp_narrow_fraction); 263 double_VAR_H(tosp_narrow_aspect_ratio); 264 double_VAR_H(tosp_wide_fraction); 265 double_VAR_H(tosp_wide_aspect_ratio); 266 double_VAR_H(tosp_fuzzy_space_factor); 267 double_VAR_H(tosp_fuzzy_space_factor1); 268 double_VAR_H(tosp_fuzzy_space_factor2); 269 double_VAR_H(tosp_gap_factor); 270 double_VAR_H(tosp_kern_gap_factor1); 271 double_VAR_H(tosp_kern_gap_factor2); 272 double_VAR_H(tosp_kern_gap_factor3); 273 double_VAR_H(tosp_ignore_big_gaps); 274 double_VAR_H(tosp_ignore_very_big_gaps); 275 double_VAR_H(tosp_rep_space); 276 double_VAR_H(tosp_enough_small_gaps); 277 double_VAR_H(tosp_table_kn_sp_ratio); 278 double_VAR_H(tosp_table_xht_sp_ratio); 279 double_VAR_H(tosp_table_fuzzy_kn_sp_ratio); 280 double_VAR_H(tosp_fuzzy_kn_fraction); 281 double_VAR_H(tosp_fuzzy_sp_fraction); 282 double_VAR_H(tosp_min_sane_kn_sp); 283 double_VAR_H(tosp_init_guess_kn_mult); 284 double_VAR_H(tosp_init_guess_xht_mult); 285 double_VAR_H(tosp_max_sane_kn_thresh); 286 double_VAR_H(tosp_flip_caution); 287 double_VAR_H(tosp_large_kerning); 288 double_VAR_H(tosp_dont_fool_with_small_kerns); 289 double_VAR_H(tosp_near_lh_edge); 290 double_VAR_H(tosp_silly_kn_sp_gap); 291 double_VAR_H(tosp_pass_wide_fuzz_sp_to_context); 292 // tordmain.cpp /////////////////////////////////////////// 293 BOOL_VAR_H(textord_no_rejects); 294 BOOL_VAR_H(textord_show_blobs); 295 BOOL_VAR_H(textord_show_boxes); 296 INT_VAR_H(textord_max_noise_size); 297 INT_VAR_H(textord_baseline_debug); 298 double_VAR_H(textord_noise_area_ratio); 299 double_VAR_H(textord_initialx_ile); 300 double_VAR_H(textord_initialasc_ile); 301 INT_VAR_H(textord_noise_sizefraction); 302 double_VAR_H(textord_noise_sizelimit); 303 INT_VAR_H(textord_noise_translimit); 304 double_VAR_H(textord_noise_normratio); 305 BOOL_VAR_H(textord_noise_rejwords); 306 BOOL_VAR_H(textord_noise_rejrows); 307 double_VAR_H(textord_noise_syfract); 308 double_VAR_H(textord_noise_sxfract); 309 double_VAR_H(textord_noise_hfract); 310 INT_VAR_H(textord_noise_sncount); 311 double_VAR_H(textord_noise_rowratio); 312 BOOL_VAR_H(textord_noise_debug); 313 double_VAR_H(textord_blshift_maxshift); 314 double_VAR_H(textord_blshift_xfraction); 315 }; 316 317 } // namespace tesseract 318 319 #endif // TESSERACT_TEXTORD_TEXTORD_H_ 320