1 ///////////////////////////////////////////////////////////////////////
2 // File:        textord.cpp
3 // Description: The top-level text line and word finding functionality.
4 // Author:      Ray Smith
5 // Created:     Fri Mar 13 14:43:01 PDT 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #  include "config_auto.h"
23 #endif
24 
25 #include "baselinedetect.h"
26 #include "drawtord.h"
27 #include "makerow.h"
28 #include "pageres.h"
29 #include "textord.h"
30 #include "tordmain.h"
31 #include "wordseg.h"
32 
33 namespace tesseract {
34 
Textord(CCStruct * ccstruct)35 Textord::Textord(CCStruct *ccstruct)
36     : ccstruct_(ccstruct)
37     , use_cjk_fp_model_(false)
38     ,
39     // makerow.cpp ///////////////////////////////////////////
40     BOOL_MEMBER(textord_single_height_mode, false, "Script has no xheight, so use a single mode",
41                 ccstruct_->params())
42     ,
43     // tospace.cpp ///////////////////////////////////////////
44     BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", ccstruct_->params())
45     , BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
46                   "Constrain relative values of inter and intra-word gaps for "
47                   "old_to_method.",
48                   ccstruct_->params())
49     , BOOL_MEMBER(tosp_only_use_prop_rows, true, "Block stats to use fixed pitch rows?",
50                   ccstruct_->params())
51     , BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
52                   "Force word breaks on punct to break long lines in non-space "
53                   "delimited langs",
54                   ccstruct_->params())
55     , BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", ccstruct_->params())
56     , BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", ccstruct_->params())
57     , BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
58     , BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
59     , BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", ccstruct_->params())
60     , BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", ccstruct_->params())
61     , BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
62                   "Use row alone when inadequate cert spaces", ccstruct_->params())
63     , BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", ccstruct_->params())
64     , BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", ccstruct_->params())
65     , BOOL_MEMBER(tosp_fuzzy_limit_all, true, "Don't restrict kn->sp fuzzy limit to tables",
66                   ccstruct_->params())
67     , BOOL_MEMBER(tosp_stats_use_xht_gaps, true, "Use within xht gap for wd breaks",
68                   ccstruct_->params())
69     , BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", ccstruct_->params())
70     , BOOL_MEMBER(tosp_only_use_xht_gaps, false, "Only use within xht gap for wd breaks",
71                   ccstruct_->params())
72     , BOOL_MEMBER(tosp_rule_9_test_punct, false, "Don't chng kn to space next to punct",
73                   ccstruct_->params())
74     , BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", ccstruct_->params())
75     , BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", ccstruct_->params())
76     , BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", ccstruct_->params())
77     , INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params())
78     , INT_MEMBER(tosp_enough_space_samples_for_median, 3, "or should we use mean",
79                  ccstruct_->params())
80     , INT_MEMBER(tosp_redo_kern_limit, 10, "No.samples reqd to reestimate for row",
81                  ccstruct_->params())
82     , INT_MEMBER(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table",
83                  ccstruct_->params())
84     , INT_MEMBER(tosp_short_row, 20, "No.gaps reqd with few cert spaces to use certs",
85                  ccstruct_->params())
86     , INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", ccstruct_->params())
87     , double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
88                     "Factor for defining space threshold in terms of space and "
89                     "kern sizes",
90                     ccstruct_->params())
91     , double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", ccstruct_->params())
92     , double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", ccstruct_->params())
93     , double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", ccstruct_->params())
94     , double_MEMBER(tosp_narrow_aspect_ratio, 0.48, "narrow if w/h less than this",
95                     ccstruct_->params())
96     , double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", ccstruct_->params())
97     , double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", ccstruct_->params())
98     , double_MEMBER(tosp_fuzzy_space_factor, 0.6, "Fract of xheight for fuzz sp",
99                     ccstruct_->params())
100     , double_MEMBER(tosp_fuzzy_space_factor1, 0.5, "Fract of xheight for fuzz sp",
101                     ccstruct_->params())
102     , double_MEMBER(tosp_fuzzy_space_factor2, 0.72, "Fract of xheight for fuzz sp",
103                     ccstruct_->params())
104     , double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", ccstruct_->params())
105     , double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", ccstruct_->params())
106     , double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", ccstruct_->params())
107     , double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", ccstruct_->params())
108     , double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", ccstruct_->params())
109     , double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", ccstruct_->params())
110     , double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", ccstruct_->params())
111     , double_MEMBER(tosp_enough_small_gaps, 0.65, "Fract of kerns reqd for isolated row stats",
112                     ccstruct_->params())
113     , double_MEMBER(tosp_table_kn_sp_ratio, 2.25, "Min difference of kn & sp in table",
114                     ccstruct_->params())
115     , double_MEMBER(tosp_table_xht_sp_ratio, 0.33, "Expect spaces bigger than this",
116                     ccstruct_->params())
117     , double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, "Fuzzy if less than this",
118                     ccstruct_->params())
119     , double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", ccstruct_->params())
120     , double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", ccstruct_->params())
121     , double_MEMBER(tosp_min_sane_kn_sp, 1.5, "Don't trust spaces less than this time kn",
122                     ccstruct_->params())
123     , double_MEMBER(tosp_init_guess_kn_mult, 2.2, "Thresh guess - mult kn by this",
124                     ccstruct_->params())
125     , double_MEMBER(tosp_init_guess_xht_mult, 0.28, "Thresh guess - mult xht by this",
126                     ccstruct_->params())
127     , double_MEMBER(tosp_max_sane_kn_thresh, 5.0, "Multiplier on kn to limit thresh",
128                     ccstruct_->params())
129     , double_MEMBER(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation",
130                     ccstruct_->params())
131     , double_MEMBER(tosp_large_kerning, 0.19, "Limit use of xht gap with large kns",
132                     ccstruct_->params())
133     , double_MEMBER(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns",
134                     ccstruct_->params())
135     , double_MEMBER(tosp_near_lh_edge, 0, "Don't reduce box if the top left is non blank",
136                     ccstruct_->params())
137     , double_MEMBER(tosp_silly_kn_sp_gap, 0.2, "Don't let sp minus kn get too small",
138                     ccstruct_->params())
139     , double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, "How wide fuzzies need context",
140                     ccstruct_->params())
141     ,
142     // tordmain.cpp ///////////////////////////////////////////
143     BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", ccstruct_->params())
144     , BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", ccstruct_->params())
145     , BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", ccstruct_->params())
146     , INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", ccstruct_->params())
147     , INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", ccstruct_->params())
148     , double_MEMBER(textord_noise_area_ratio, 0.7, "Fraction of bounding box for noise",
149                     ccstruct_->params())
150     , double_MEMBER(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess",
151                     ccstruct_->params())
152     , double_MEMBER(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess",
153                     ccstruct_->params())
154     , INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", ccstruct_->params())
155     , double_MEMBER(textord_noise_sizelimit, 0.5, "Fraction of x for big t count",
156                     ccstruct_->params())
157     , INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", ccstruct_->params())
158     , double_MEMBER(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion",
159                     ccstruct_->params())
160     , BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", ccstruct_->params())
161     , BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", ccstruct_->params())
162     , double_MEMBER(textord_noise_syfract, 0.2, "xh fract height error for norm blobs",
163                     ccstruct_->params())
164     , double_MEMBER(textord_noise_sxfract, 0.4, "xh fract width error for norm blobs",
165                     ccstruct_->params())
166     , double_MEMBER(textord_noise_hfract, 1.0 / 64,
167                     "Height fraction to discard outlines as speckle noise", ccstruct_->params())
168     , INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", ccstruct_->params())
169     , double_MEMBER(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion",
170                     ccstruct_->params())
171     , BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", ccstruct_->params())
172     , double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", ccstruct_->params())
173     , double_MEMBER(textord_blshift_xfraction, 9.99, "Min size of baseline shift",
174                     ccstruct_->params()) {}
175 
176 // Make the textlines and words inside each block.
TextordPage(PageSegMode pageseg_mode,const FCOORD & reskew,int width,int height,Image binary_pix,Image thresholds_pix,Image grey_pix,bool use_box_bottoms,BLOBNBOX_LIST * diacritic_blobs,BLOCK_LIST * blocks,TO_BLOCK_LIST * to_blocks)177 void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
178                           Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
179                           BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
180                           TO_BLOCK_LIST *to_blocks) {
181   page_tr_.set_x(width);
182   page_tr_.set_y(height);
183   if (to_blocks->empty()) {
184     // AutoPageSeg was not used, so we need to find_components first.
185     find_components(binary_pix, blocks, to_blocks);
186     TO_BLOCK_IT it(to_blocks);
187     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
188       TO_BLOCK *to_block = it.data();
189       // Compute the edge offsets whether or not there is a grey_pix.
190       // We have by-passed auto page seg, so we have to run it here.
191       // By page segmentation mode there is no non-text to avoid running on.
192       to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
193     }
194   } else if (!PSM_SPARSE(pageseg_mode)) {
195     // AutoPageSeg does not need to find_components as it did that already.
196     // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
197     filter_blobs(page_tr_, to_blocks, true);
198   }
199 
200   ASSERT_HOST(!to_blocks->empty());
201   if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
202     const FCOORD anticlockwise90(0.0f, 1.0f);
203     const FCOORD clockwise90(0.0f, -1.0f);
204     TO_BLOCK_IT it(to_blocks);
205     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
206       TO_BLOCK *to_block = it.data();
207       BLOCK *block = to_block->block;
208       // Create a fake poly_block in block from its bounding box.
209       block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(), PT_VERTICAL_TEXT));
210       // Rotate the to_block along with its contained block and blobnbox lists.
211       to_block->rotate(anticlockwise90);
212       // Set the block's rotation values to obey the convention followed in
213       // layout analysis for vertical text.
214       block->set_re_rotation(clockwise90);
215       block->set_classify_rotation(clockwise90);
216     }
217   }
218 
219   TO_BLOCK_IT to_block_it(to_blocks);
220   TO_BLOCK *to_block = to_block_it.data();
221   // Make the rows in the block.
222   float gradient;
223   // Do it the old fashioned way.
224   if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
225     gradient = make_rows(page_tr_, to_blocks);
226   } else if (!PSM_SPARSE(pageseg_mode)) {
227     // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
228     gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
229   } else {
230     gradient = 0.0f;
231   }
232   BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks);
233   baseline_detector.ComputeStraightBaselines(use_box_bottoms);
234   baseline_detector.ComputeBaselineSplinesAndXheights(
235       page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, textord_show_final_rows, this);
236   // Now make the words in the lines.
237   if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
238     // SINGLE_LINE uses the old word maker on the single line.
239     make_words(this, page_tr_, gradient, blocks, to_blocks);
240   } else {
241     // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
242     // single word, and in SINGLE_CHAR mode, all the outlines
243     // go in a single blob.
244     TO_BLOCK *to_block = to_block_it.data();
245     make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(),
246                      to_block->block->row_list());
247   }
248   // Remove empties.
249   cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
250   TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
251   // Compute the margins for each row in the block, to be used later for
252   // paragraph detection.
253   BLOCK_IT b_it(blocks);
254   for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
255     b_it.data()->compute_row_margins();
256   }
257 #ifndef GRAPHICS_DISABLED
258   close_to_win();
259 #endif
260 }
261 
262 // If we were supposed to return only a single textline, and there is more
263 // than one, clean up and leave only the best.
CleanupSingleRowResult(PageSegMode pageseg_mode,PAGE_RES * page_res)264 void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res) {
265   if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) {
266     return; // No cleanup required.
267   }
268   PAGE_RES_IT it(page_res);
269   // Find the best row, being the greatest mean word conf.
270   float row_total_conf = 0.0f;
271   int row_word_count = 0;
272   ROW_RES *best_row = nullptr;
273   float best_conf = 0.0f;
274   for (it.restart_page(); it.word() != nullptr; it.forward()) {
275     WERD_RES *word = it.word();
276     row_total_conf += word->best_choice->certainty();
277     ++row_word_count;
278     if (it.next_row() != it.row()) {
279       row_total_conf /= row_word_count;
280       if (best_row == nullptr || best_conf < row_total_conf) {
281         best_row = it.row();
282         best_conf = row_total_conf;
283       }
284       row_total_conf = 0.0f;
285       row_word_count = 0;
286     }
287   }
288   // Now eliminate any word not in the best row.
289   for (it.restart_page(); it.word() != nullptr; it.forward()) {
290     if (it.row() != best_row) {
291       it.DeleteCurrentWord();
292     }
293   }
294 }
295 
296 } // namespace tesseract.
297