1 ///////////////////////////////////////////////////////////////////////
2 // File:        textord.h
3 // Description: The Textord class definition gathers text line and word
4 //              finding functionality.
5 // Author:      Ray Smith
6 // Created:     Fri Mar 13 14:29:01 PDT 2009
7 //
8 // (C) Copyright 2009, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
19 ///////////////////////////////////////////////////////////////////////
20 
21 #ifndef TESSERACT_TEXTORD_TEXTORD_H_
22 #define TESSERACT_TEXTORD_TEXTORD_H_
23 
24 #include "bbgrid.h"
25 #include "blobbox.h"
26 #include "ccstruct.h"
27 #include "gap_map.h"
28 
29 #include <tesseract/publictypes.h> // For PageSegMode.
30 
31 namespace tesseract {
32 
33 class FCOORD;
34 class BLOCK_LIST;
35 class PAGE_RES;
36 class TO_BLOCK;
37 class TO_BLOCK_LIST;
38 class ScrollView;
39 
40 // A simple class that can be used by BBGrid to hold a word and an expanded
41 // bounding box that makes it easy to find words to put diacritics.
42 class WordWithBox {
43 public:
WordWithBox()44   WordWithBox() : word_(nullptr) {}
WordWithBox(WERD * word)45   explicit WordWithBox(WERD *word) : word_(word), bounding_box_(word->bounding_box()) {
46     int height = bounding_box_.height();
47     bounding_box_.pad(height, height);
48   }
49 
bounding_box()50   const TBOX &bounding_box() const {
51     return bounding_box_;
52   }
53   // Returns the bounding box of only the good blobs.
true_bounding_box()54   TBOX true_bounding_box() const {
55     return word_->true_bounding_box();
56   }
RejBlobs()57   C_BLOB_LIST *RejBlobs() const {
58     return word_->rej_cblob_list();
59   }
word()60   const WERD *word() const {
61     return word_;
62   }
63 
64 private:
65   // Borrowed pointer to a real word somewhere that must outlive this class.
66   WERD *word_;
67   // Cached expanded bounding box of the word, padded all round by its height.
68   TBOX bounding_box_;
69 };
70 
71 // Make it usable by BBGrid.
72 CLISTIZEH(WordWithBox)
73 using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
74 using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
75 
76 class Textord {
77 public:
78   explicit Textord(CCStruct *ccstruct);
79   ~Textord() = default;
80 
81   // Make the textlines and words inside each block.
82   // binary_pix is mandatory and is the binarized input after line removal.
83   // grey_pix is optional, but if present must match the binary_pix in size,
84   // and must be a *real* grey image instead of binary_pix * 255.
85   // thresholds_pix is expected to be present iff grey_pix is present and
86   // can be an integer factor reduction of the grey_pix. It represents the
87   // thresholds that were used to create the binary_pix from the grey_pix.
88   // diacritic_blobs contain small confusing components that should be added
89   // to the appropriate word(s) in case they are really diacritics.
90   void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
91                    Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
92                    BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
93 
94   // If we were supposed to return only a single textline, and there is more
95   // than one, clean up and leave only the best.
96   void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res);
97 
use_cjk_fp_model()98   bool use_cjk_fp_model() const {
99     return use_cjk_fp_model_;
100   }
set_use_cjk_fp_model(bool flag)101   void set_use_cjk_fp_model(bool flag) {
102     use_cjk_fp_model_ = flag;
103   }
104 
105   // tospace.cpp ///////////////////////////////////////////
106   void to_spacing(ICOORD page_tr,       // topright of page
107                   TO_BLOCK_LIST *blocks // blocks on page
108   );
109   ROW *make_prop_words(TO_ROW *row,    // row to make
110                        FCOORD rotation // for drawing
111   );
112   ROW *make_blob_words(TO_ROW *row,    // row to make
113                        FCOORD rotation // for drawing
114   );
115   // tordmain.cpp ///////////////////////////////////////////
116   void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
117   void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on);
118 
119 private:
120   // For underlying memory management and other utilities.
121   CCStruct *ccstruct_;
122 
123   // The size of the input image.
124   ICOORD page_tr_;
125 
126   bool use_cjk_fp_model_;
127 
128   // makerow.cpp ///////////////////////////////////////////
129   // Make the textlines inside each block.
130   void MakeRows(PageSegMode pageseg_mode, const FCOORD &skew, int width, int height,
131                 TO_BLOCK_LIST *to_blocks);
132   // Make the textlines inside a single block.
133   void MakeBlockRows(int min_spacing, int max_spacing, const FCOORD &skew, TO_BLOCK *block,
134                      ScrollView *win);
135 
136 public:
137   void compute_block_xheight(TO_BLOCK *block, float gradient);
138   void compute_row_xheight(TO_ROW *row, // row to do
139                            const FCOORD &rotation,
140                            float gradient, // global skew
141                            int block_line_size);
142   void make_spline_rows(TO_BLOCK *block, // block to do
143                         float gradient,  // gradient to fit
144                         bool testing_on);
145 
146 private:
147   //// oldbasel.cpp ////////////////////////////////////////
148   void make_old_baselines(TO_BLOCK *block, // block to do
149                           bool testing_on, // correct orientation
150                           float gradient);
151   void correlate_lines(TO_BLOCK *block, float gradient);
152   void correlate_neighbours(TO_BLOCK *block, // block rows are in.
153                             TO_ROW **rows,   // rows of block.
154                             int rowcount);   // no of rows to do.
155   int correlate_with_stats(TO_ROW **rows,    // rows of block.
156                            int rowcount,     // no of rows to do.
157                            TO_BLOCK *block);
158   void find_textlines(TO_BLOCK *block,  // block row is in
159                       TO_ROW *row,      // row to do
160                       int degree,       // required approximation
161                       QSPLINE *spline); // starting spline
162   // tospace.cpp ///////////////////////////////////////////
163   // DEBUG USE ONLY
164   void block_spacing_stats(TO_BLOCK *block, GAPMAP *gapmap, bool &old_text_ord_proportional,
165                            // resulting estimate
166                            int16_t &block_space_gap_width,
167                            // resulting estimate
168                            int16_t &block_non_space_gap_width);
169   void row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx, int16_t row_idx,
170                          // estimate for block
171                          int16_t block_space_gap_width,
172                          // estimate for block
173                          int16_t block_non_space_gap_width);
174   void old_to_method(TO_ROW *row, STATS *all_gap_stats, STATS *space_gap_stats,
175                      STATS *small_gap_stats, int16_t block_space_gap_width,
176                      // estimate for block
177                      int16_t block_non_space_gap_width);
178   bool isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_stats, bool suspected_table,
179                           int16_t block_idx, int16_t row_idx);
180   int16_t stats_count_under(STATS *stats, int16_t threshold);
181   void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
182   bool make_a_word_break(TO_ROW *row,   // row being made
183                          TBOX blob_box, // for next_blob // how many blanks?
184                          int16_t prev_gap, TBOX prev_blob_box, int16_t real_current_gap,
185                          int16_t within_xht_current_gap, TBOX next_blob_box, int16_t next_gap,
186                          uint8_t &blanks, bool &fuzzy_sp, bool &fuzzy_non,
187                          bool &prev_gap_was_a_space, bool &break_at_next_gap);
188   bool narrow_blob(TO_ROW *row, TBOX blob_box);
189   bool wide_blob(TO_ROW *row, TBOX blob_box);
190   bool suspected_punct_blob(TO_ROW *row, TBOX box);
191   void peek_at_next_gap(TO_ROW *row, BLOBNBOX_IT box_it, TBOX &next_blob_box, int16_t &next_gap,
192                         int16_t &next_within_xht_gap);
193   void mark_gap(TBOX blob,    // blob following gap
194                 int16_t rule, // heuristic id
195                 int16_t prev_gap, int16_t prev_blob_width, int16_t current_gap,
196                 int16_t next_blob_width, int16_t next_gap);
197   float find_mean_blob_spacing(WERD *word);
198   bool ignore_big_gap(TO_ROW *row, int32_t row_length, GAPMAP *gapmap, int16_t left, int16_t right);
199   // get bounding box
200   TBOX reduced_box_next(TO_ROW *row,    // current row
201                         BLOBNBOX_IT *it // iterator to blobds
202   );
203   TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht);
204   // tordmain.cpp ///////////////////////////////////////////
205   float filter_noise_blobs(BLOBNBOX_LIST *src_list, BLOBNBOX_LIST *noise_list,
206                            BLOBNBOX_LIST *small_list, BLOBNBOX_LIST *large_list);
207   // Fixes the block so it obeys all the rules:
208   // Must have at least one ROW.
209   // Must have at least one WERD.
210   // WERDs contain a fake blob.
211   void cleanup_nontext_block(BLOCK *block);
212   void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks);
213   bool clean_noise_from_row(ROW *row);
214   void clean_noise_from_words(ROW *row);
215   // Remove outlines that are a tiny fraction in either width or height
216   // of the word height.
217   void clean_small_noise_from_words(ROW *row);
218   // Groups blocks by rotation, then, for each group, makes a WordGrid and calls
219   // TransferDiacriticsToWords to copy the diacritic blobs to the most
220   // appropriate words in the group of blocks. Source blobs are not touched.
221   void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks);
222   // Places a copy of blobs that are near a word (after applying rotation to the
223   // blob) in the most appropriate word, unless there is doubt, in which case a
224   // blob can end up in two words. Source blobs are not touched.
225   void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, const FCOORD &rotation,
226                                  WordGrid *word_grid);
227 
228 public:
229   // makerow.cpp ///////////////////////////////////////////
230   BOOL_VAR_H(textord_single_height_mode);
231   // tospace.cpp ///////////////////////////////////////////
232   BOOL_VAR_H(tosp_old_to_method);
233   BOOL_VAR_H(tosp_old_to_constrain_sp_kn);
234   BOOL_VAR_H(tosp_only_use_prop_rows);
235   BOOL_VAR_H(tosp_force_wordbreak_on_punct);
236   BOOL_VAR_H(tosp_use_pre_chopping);
237   BOOL_VAR_H(tosp_old_to_bug_fix);
238   BOOL_VAR_H(tosp_block_use_cert_spaces);
239   BOOL_VAR_H(tosp_row_use_cert_spaces);
240   BOOL_VAR_H(tosp_narrow_blobs_not_cert);
241   BOOL_VAR_H(tosp_row_use_cert_spaces1);
242   BOOL_VAR_H(tosp_recovery_isolated_row_stats);
243   BOOL_VAR_H(tosp_only_small_gaps_for_kern);
244   BOOL_VAR_H(tosp_all_flips_fuzzy);
245   BOOL_VAR_H(tosp_fuzzy_limit_all);
246   BOOL_VAR_H(tosp_stats_use_xht_gaps);
247   BOOL_VAR_H(tosp_use_xht_gaps);
248   BOOL_VAR_H(tosp_only_use_xht_gaps);
249   BOOL_VAR_H(tosp_rule_9_test_punct);
250   BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp);
251   BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn);
252   BOOL_VAR_H(tosp_improve_thresh);
253   INT_VAR_H(tosp_debug_level);
254   INT_VAR_H(tosp_enough_space_samples_for_median);
255   INT_VAR_H(tosp_redo_kern_limit);
256   INT_VAR_H(tosp_few_samples);
257   INT_VAR_H(tosp_short_row);
258   INT_VAR_H(tosp_sanity_method);
259   double_VAR_H(tosp_old_sp_kn_th_factor);
260   double_VAR_H(tosp_threshold_bias1);
261   double_VAR_H(tosp_threshold_bias2);
262   double_VAR_H(tosp_narrow_fraction);
263   double_VAR_H(tosp_narrow_aspect_ratio);
264   double_VAR_H(tosp_wide_fraction);
265   double_VAR_H(tosp_wide_aspect_ratio);
266   double_VAR_H(tosp_fuzzy_space_factor);
267   double_VAR_H(tosp_fuzzy_space_factor1);
268   double_VAR_H(tosp_fuzzy_space_factor2);
269   double_VAR_H(tosp_gap_factor);
270   double_VAR_H(tosp_kern_gap_factor1);
271   double_VAR_H(tosp_kern_gap_factor2);
272   double_VAR_H(tosp_kern_gap_factor3);
273   double_VAR_H(tosp_ignore_big_gaps);
274   double_VAR_H(tosp_ignore_very_big_gaps);
275   double_VAR_H(tosp_rep_space);
276   double_VAR_H(tosp_enough_small_gaps);
277   double_VAR_H(tosp_table_kn_sp_ratio);
278   double_VAR_H(tosp_table_xht_sp_ratio);
279   double_VAR_H(tosp_table_fuzzy_kn_sp_ratio);
280   double_VAR_H(tosp_fuzzy_kn_fraction);
281   double_VAR_H(tosp_fuzzy_sp_fraction);
282   double_VAR_H(tosp_min_sane_kn_sp);
283   double_VAR_H(tosp_init_guess_kn_mult);
284   double_VAR_H(tosp_init_guess_xht_mult);
285   double_VAR_H(tosp_max_sane_kn_thresh);
286   double_VAR_H(tosp_flip_caution);
287   double_VAR_H(tosp_large_kerning);
288   double_VAR_H(tosp_dont_fool_with_small_kerns);
289   double_VAR_H(tosp_near_lh_edge);
290   double_VAR_H(tosp_silly_kn_sp_gap);
291   double_VAR_H(tosp_pass_wide_fuzz_sp_to_context);
292   // tordmain.cpp ///////////////////////////////////////////
293   BOOL_VAR_H(textord_no_rejects);
294   BOOL_VAR_H(textord_show_blobs);
295   BOOL_VAR_H(textord_show_boxes);
296   INT_VAR_H(textord_max_noise_size);
297   INT_VAR_H(textord_baseline_debug);
298   double_VAR_H(textord_noise_area_ratio);
299   double_VAR_H(textord_initialx_ile);
300   double_VAR_H(textord_initialasc_ile);
301   INT_VAR_H(textord_noise_sizefraction);
302   double_VAR_H(textord_noise_sizelimit);
303   INT_VAR_H(textord_noise_translimit);
304   double_VAR_H(textord_noise_normratio);
305   BOOL_VAR_H(textord_noise_rejwords);
306   BOOL_VAR_H(textord_noise_rejrows);
307   double_VAR_H(textord_noise_syfract);
308   double_VAR_H(textord_noise_sxfract);
309   double_VAR_H(textord_noise_hfract);
310   INT_VAR_H(textord_noise_sncount);
311   double_VAR_H(textord_noise_rowratio);
312   BOOL_VAR_H(textord_noise_debug);
313   double_VAR_H(textord_blshift_maxshift);
314   double_VAR_H(textord_blshift_xfraction);
315 };
316 
317 } // namespace tesseract
318 
319 #endif // TESSERACT_TEXTORD_TEXTORD_H_
320