1 ///////////////////////////////////////////////////////////////////////
2 // File: textord.cpp
3 // Description: The top-level text line and word finding functionality.
4 // Author: Ray Smith
5 // Created: Fri Mar 13 14:43:01 PDT 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 # include "config_auto.h"
23 #endif
24
25 #include "baselinedetect.h"
26 #include "drawtord.h"
27 #include "makerow.h"
28 #include "pageres.h"
29 #include "textord.h"
30 #include "tordmain.h"
31 #include "wordseg.h"
32
33 namespace tesseract {
34
Textord(CCStruct * ccstruct)35 Textord::Textord(CCStruct *ccstruct)
36 : ccstruct_(ccstruct)
37 , use_cjk_fp_model_(false)
38 ,
39 // makerow.cpp ///////////////////////////////////////////
40 BOOL_MEMBER(textord_single_height_mode, false, "Script has no xheight, so use a single mode",
41 ccstruct_->params())
42 ,
43 // tospace.cpp ///////////////////////////////////////////
44 BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", ccstruct_->params())
45 , BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
46 "Constrain relative values of inter and intra-word gaps for "
47 "old_to_method.",
48 ccstruct_->params())
49 , BOOL_MEMBER(tosp_only_use_prop_rows, true, "Block stats to use fixed pitch rows?",
50 ccstruct_->params())
51 , BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
52 "Force word breaks on punct to break long lines in non-space "
53 "delimited langs",
54 ccstruct_->params())
55 , BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", ccstruct_->params())
56 , BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", ccstruct_->params())
57 , BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
58 , BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
59 , BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", ccstruct_->params())
60 , BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", ccstruct_->params())
61 , BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
62 "Use row alone when inadequate cert spaces", ccstruct_->params())
63 , BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", ccstruct_->params())
64 , BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", ccstruct_->params())
65 , BOOL_MEMBER(tosp_fuzzy_limit_all, true, "Don't restrict kn->sp fuzzy limit to tables",
66 ccstruct_->params())
67 , BOOL_MEMBER(tosp_stats_use_xht_gaps, true, "Use within xht gap for wd breaks",
68 ccstruct_->params())
69 , BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", ccstruct_->params())
70 , BOOL_MEMBER(tosp_only_use_xht_gaps, false, "Only use within xht gap for wd breaks",
71 ccstruct_->params())
72 , BOOL_MEMBER(tosp_rule_9_test_punct, false, "Don't chng kn to space next to punct",
73 ccstruct_->params())
74 , BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", ccstruct_->params())
75 , BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", ccstruct_->params())
76 , BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", ccstruct_->params())
77 , INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params())
78 , INT_MEMBER(tosp_enough_space_samples_for_median, 3, "or should we use mean",
79 ccstruct_->params())
80 , INT_MEMBER(tosp_redo_kern_limit, 10, "No.samples reqd to reestimate for row",
81 ccstruct_->params())
82 , INT_MEMBER(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table",
83 ccstruct_->params())
84 , INT_MEMBER(tosp_short_row, 20, "No.gaps reqd with few cert spaces to use certs",
85 ccstruct_->params())
86 , INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", ccstruct_->params())
87 , double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
88 "Factor for defining space threshold in terms of space and "
89 "kern sizes",
90 ccstruct_->params())
91 , double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", ccstruct_->params())
92 , double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", ccstruct_->params())
93 , double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", ccstruct_->params())
94 , double_MEMBER(tosp_narrow_aspect_ratio, 0.48, "narrow if w/h less than this",
95 ccstruct_->params())
96 , double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", ccstruct_->params())
97 , double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", ccstruct_->params())
98 , double_MEMBER(tosp_fuzzy_space_factor, 0.6, "Fract of xheight for fuzz sp",
99 ccstruct_->params())
100 , double_MEMBER(tosp_fuzzy_space_factor1, 0.5, "Fract of xheight for fuzz sp",
101 ccstruct_->params())
102 , double_MEMBER(tosp_fuzzy_space_factor2, 0.72, "Fract of xheight for fuzz sp",
103 ccstruct_->params())
104 , double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", ccstruct_->params())
105 , double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", ccstruct_->params())
106 , double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", ccstruct_->params())
107 , double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", ccstruct_->params())
108 , double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", ccstruct_->params())
109 , double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", ccstruct_->params())
110 , double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", ccstruct_->params())
111 , double_MEMBER(tosp_enough_small_gaps, 0.65, "Fract of kerns reqd for isolated row stats",
112 ccstruct_->params())
113 , double_MEMBER(tosp_table_kn_sp_ratio, 2.25, "Min difference of kn & sp in table",
114 ccstruct_->params())
115 , double_MEMBER(tosp_table_xht_sp_ratio, 0.33, "Expect spaces bigger than this",
116 ccstruct_->params())
117 , double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, "Fuzzy if less than this",
118 ccstruct_->params())
119 , double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", ccstruct_->params())
120 , double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", ccstruct_->params())
121 , double_MEMBER(tosp_min_sane_kn_sp, 1.5, "Don't trust spaces less than this time kn",
122 ccstruct_->params())
123 , double_MEMBER(tosp_init_guess_kn_mult, 2.2, "Thresh guess - mult kn by this",
124 ccstruct_->params())
125 , double_MEMBER(tosp_init_guess_xht_mult, 0.28, "Thresh guess - mult xht by this",
126 ccstruct_->params())
127 , double_MEMBER(tosp_max_sane_kn_thresh, 5.0, "Multiplier on kn to limit thresh",
128 ccstruct_->params())
129 , double_MEMBER(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation",
130 ccstruct_->params())
131 , double_MEMBER(tosp_large_kerning, 0.19, "Limit use of xht gap with large kns",
132 ccstruct_->params())
133 , double_MEMBER(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns",
134 ccstruct_->params())
135 , double_MEMBER(tosp_near_lh_edge, 0, "Don't reduce box if the top left is non blank",
136 ccstruct_->params())
137 , double_MEMBER(tosp_silly_kn_sp_gap, 0.2, "Don't let sp minus kn get too small",
138 ccstruct_->params())
139 , double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, "How wide fuzzies need context",
140 ccstruct_->params())
141 ,
142 // tordmain.cpp ///////////////////////////////////////////
143 BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", ccstruct_->params())
144 , BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", ccstruct_->params())
145 , BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", ccstruct_->params())
146 , INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", ccstruct_->params())
147 , INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", ccstruct_->params())
148 , double_MEMBER(textord_noise_area_ratio, 0.7, "Fraction of bounding box for noise",
149 ccstruct_->params())
150 , double_MEMBER(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess",
151 ccstruct_->params())
152 , double_MEMBER(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess",
153 ccstruct_->params())
154 , INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", ccstruct_->params())
155 , double_MEMBER(textord_noise_sizelimit, 0.5, "Fraction of x for big t count",
156 ccstruct_->params())
157 , INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", ccstruct_->params())
158 , double_MEMBER(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion",
159 ccstruct_->params())
160 , BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", ccstruct_->params())
161 , BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", ccstruct_->params())
162 , double_MEMBER(textord_noise_syfract, 0.2, "xh fract height error for norm blobs",
163 ccstruct_->params())
164 , double_MEMBER(textord_noise_sxfract, 0.4, "xh fract width error for norm blobs",
165 ccstruct_->params())
166 , double_MEMBER(textord_noise_hfract, 1.0 / 64,
167 "Height fraction to discard outlines as speckle noise", ccstruct_->params())
168 , INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", ccstruct_->params())
169 , double_MEMBER(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion",
170 ccstruct_->params())
171 , BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", ccstruct_->params())
172 , double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", ccstruct_->params())
173 , double_MEMBER(textord_blshift_xfraction, 9.99, "Min size of baseline shift",
174 ccstruct_->params()) {}
175
176 // Make the textlines and words inside each block.
TextordPage(PageSegMode pageseg_mode,const FCOORD & reskew,int width,int height,Image binary_pix,Image thresholds_pix,Image grey_pix,bool use_box_bottoms,BLOBNBOX_LIST * diacritic_blobs,BLOCK_LIST * blocks,TO_BLOCK_LIST * to_blocks)177 void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
178 Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
179 BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
180 TO_BLOCK_LIST *to_blocks) {
181 page_tr_.set_x(width);
182 page_tr_.set_y(height);
183 if (to_blocks->empty()) {
184 // AutoPageSeg was not used, so we need to find_components first.
185 find_components(binary_pix, blocks, to_blocks);
186 TO_BLOCK_IT it(to_blocks);
187 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
188 TO_BLOCK *to_block = it.data();
189 // Compute the edge offsets whether or not there is a grey_pix.
190 // We have by-passed auto page seg, so we have to run it here.
191 // By page segmentation mode there is no non-text to avoid running on.
192 to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
193 }
194 } else if (!PSM_SPARSE(pageseg_mode)) {
195 // AutoPageSeg does not need to find_components as it did that already.
196 // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
197 filter_blobs(page_tr_, to_blocks, true);
198 }
199
200 ASSERT_HOST(!to_blocks->empty());
201 if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
202 const FCOORD anticlockwise90(0.0f, 1.0f);
203 const FCOORD clockwise90(0.0f, -1.0f);
204 TO_BLOCK_IT it(to_blocks);
205 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
206 TO_BLOCK *to_block = it.data();
207 BLOCK *block = to_block->block;
208 // Create a fake poly_block in block from its bounding box.
209 block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(), PT_VERTICAL_TEXT));
210 // Rotate the to_block along with its contained block and blobnbox lists.
211 to_block->rotate(anticlockwise90);
212 // Set the block's rotation values to obey the convention followed in
213 // layout analysis for vertical text.
214 block->set_re_rotation(clockwise90);
215 block->set_classify_rotation(clockwise90);
216 }
217 }
218
219 TO_BLOCK_IT to_block_it(to_blocks);
220 TO_BLOCK *to_block = to_block_it.data();
221 // Make the rows in the block.
222 float gradient;
223 // Do it the old fashioned way.
224 if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
225 gradient = make_rows(page_tr_, to_blocks);
226 } else if (!PSM_SPARSE(pageseg_mode)) {
227 // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
228 gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
229 } else {
230 gradient = 0.0f;
231 }
232 BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks);
233 baseline_detector.ComputeStraightBaselines(use_box_bottoms);
234 baseline_detector.ComputeBaselineSplinesAndXheights(
235 page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, textord_show_final_rows, this);
236 // Now make the words in the lines.
237 if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
238 // SINGLE_LINE uses the old word maker on the single line.
239 make_words(this, page_tr_, gradient, blocks, to_blocks);
240 } else {
241 // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
242 // single word, and in SINGLE_CHAR mode, all the outlines
243 // go in a single blob.
244 TO_BLOCK *to_block = to_block_it.data();
245 make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(),
246 to_block->block->row_list());
247 }
248 // Remove empties.
249 cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
250 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
251 // Compute the margins for each row in the block, to be used later for
252 // paragraph detection.
253 BLOCK_IT b_it(blocks);
254 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
255 b_it.data()->compute_row_margins();
256 }
257 #ifndef GRAPHICS_DISABLED
258 close_to_win();
259 #endif
260 }
261
262 // If we were supposed to return only a single textline, and there is more
263 // than one, clean up and leave only the best.
CleanupSingleRowResult(PageSegMode pageseg_mode,PAGE_RES * page_res)264 void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res) {
265 if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) {
266 return; // No cleanup required.
267 }
268 PAGE_RES_IT it(page_res);
269 // Find the best row, being the greatest mean word conf.
270 float row_total_conf = 0.0f;
271 int row_word_count = 0;
272 ROW_RES *best_row = nullptr;
273 float best_conf = 0.0f;
274 for (it.restart_page(); it.word() != nullptr; it.forward()) {
275 WERD_RES *word = it.word();
276 row_total_conf += word->best_choice->certainty();
277 ++row_word_count;
278 if (it.next_row() != it.row()) {
279 row_total_conf /= row_word_count;
280 if (best_row == nullptr || best_conf < row_total_conf) {
281 best_row = it.row();
282 best_conf = row_total_conf;
283 }
284 row_total_conf = 0.0f;
285 row_word_count = 0;
286 }
287 }
288 // Now eliminate any word not in the best row.
289 for (it.restart_page(); it.word() != nullptr; it.forward()) {
290 if (it.row() != best_row) {
291 it.DeleteCurrentWord();
292 }
293 }
294 }
295
296 } // namespace tesseract.
297