1 /////////////////////////////////////////////////////////////////////// 2 // File: tesseractclass.h 3 // Description: The Tesseract class. It holds/owns everything needed 4 // to run Tesseract on a single language, and also a set of 5 // sub-Tesseracts to run sub-languages. For thread safety, *every* 6 // global variable goes in here, directly, or indirectly. 7 // This makes it safe to run multiple Tesseracts in different 8 // threads in parallel, and keeps the different language 9 // instances separate. 10 // Author: Ray Smith 11 // 12 // (C) Copyright 2008, Google Inc. 13 // Licensed under the Apache License, Version 2.0 (the "License"); 14 // you may not use this file except in compliance with the License. 15 // You may obtain a copy of the License at 16 // http://www.apache.org/licenses/LICENSE-2.0 17 // Unless required by applicable law or agreed to in writing, software 18 // distributed under the License is distributed on an "AS IS" BASIS, 19 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 // See the License for the specific language governing permissions and 21 // limitations under the License. 22 // 23 /////////////////////////////////////////////////////////////////////// 24 25 #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H_ 26 #define TESSERACT_CCMAIN_TESSERACTCLASS_H_ 27 28 #ifdef HAVE_CONFIG_H 29 # include "config_auto.h" // DISABLED_LEGACY_ENGINE 30 #endif 31 32 #include "control.h" // for ACCEPTABLE_WERD_TYPE 33 #include "debugpixa.h" // for DebugPixa 34 #include "devanagari_processing.h" // for ShiroRekhaSplitter 35 #ifndef DISABLED_LEGACY_ENGINE 36 # include "docqual.h" // for GARBAGE_LEVEL 37 #endif 38 #include "genericvector.h" // for PointerVector 39 #include "pageres.h" // for WERD_RES (ptr only), PAGE_RES (pt... 40 #include "params.h" // for BOOL_VAR_H, BoolParam, DoubleParam 41 #include "points.h" // for FCOORD 42 #include "ratngs.h" // for ScriptPos, WERD_CHOICE (ptr only) 43 #include "tessdatamanager.h" // for TessdataManager 44 #include "textord.h" // for Textord 45 #include "wordrec.h" // for Wordrec 46 47 #include <tesseract/publictypes.h> // for OcrEngineMode, PageSegMode, OEM_L... 48 #include <tesseract/unichar.h> // for UNICHAR_ID 49 50 #include <allheaders.h> // for pixDestroy, pixGetWidth, pixGetHe... 51 52 #include <cstdint> // for int16_t, int32_t, uint16_t 53 #include <cstdio> // for FILE 54 55 namespace tesseract { 56 57 class BLOCK_LIST; 58 class ETEXT_DESC; 59 struct OSResults; 60 class PAGE_RES; 61 class PAGE_RES_IT; 62 class ROW; 63 class SVMenuNode; 64 class TBOX; 65 class TO_BLOCK_LIST; 66 class WERD; 67 class WERD_CHOICE; 68 class WERD_RES; 69 70 class ColumnFinder; 71 class DocumentData; 72 #ifndef DISABLED_LEGACY_ENGINE 73 class EquationDetect; 74 #endif // ndef DISABLED_LEGACY_ENGINE 75 class ImageData; 76 class LSTMRecognizer; 77 class Tesseract; 78 79 // Top-level class for all tesseract global instance data. 80 // This class either holds or points to all data used by an instance 81 // of Tesseract, including the memory allocator. When this is 82 // complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT! 83 // 84 // NOTE to developers: Do not create cyclic dependencies through this class! 85 // The directory dependency tree must remain a tree! To keep this clean, 86 // lower-level code (eg in ccutil, the bottom level) must never need to 87 // know about the content of a higher-level directory. 88 // The following scheme will grant the easiest access to lower-level 89 // global members without creating a cyclic dependency: 90 // 91 // Class Hierarchy (^ = inheritance): 92 // 93 // CCUtil (ccutil/ccutil.h) 94 // ^ Members include: UNICHARSET 95 // CCStruct (ccstruct/ccstruct.h) 96 // ^ Members include: Image 97 // Classify (classify/classify.h) 98 // ^ Members include: Dict 99 // WordRec (wordrec/wordrec.h) 100 // ^ Members include: WERD*, DENORM* 101 // Tesseract (ccmain/tesseractclass.h) 102 // Members include: Pix* 103 // 104 // Other important classes: 105 // 106 // TessBaseAPI (tesseract/baseapi.h) 107 // Members include: BLOCK_LIST*, PAGE_RES*, 108 // Tesseract*, ImageThresholder* 109 // Dict (dict/dict.h) 110 // Members include: Image* (private) 111 // 112 // NOTE: that each level contains members that correspond to global 113 // data that is defined (and used) at that level, not necessarily where 114 // the type is defined so for instance: 115 // BOOL_VAR_H(textord_show_blobs); 116 // goes inside the Textord class, not the cc_util class. 117 118 // A collection of various variables for statistics and debugging. 119 struct TesseractStats { TesseractStatsTesseractStats120 TesseractStats() 121 : adaption_word_number(0) 122 , doc_blob_quality(0) 123 , doc_outline_errs(0) 124 , doc_char_quality(0) 125 , good_char_count(0) 126 , doc_good_char_quality(0) 127 , word_count(0) 128 , dict_words(0) 129 , tilde_crunch_written(false) 130 , last_char_was_newline(true) 131 , last_char_was_tilde(false) 132 , write_results_empty_block(true) {} 133 134 int32_t adaption_word_number; 135 int16_t doc_blob_quality; 136 int16_t doc_outline_errs; 137 int16_t doc_char_quality; 138 int16_t good_char_count; 139 int16_t doc_good_char_quality; 140 int32_t word_count; // count of word in the document 141 int32_t dict_words; // number of dicitionary words in the document 142 std::string dump_words_str; // accumulator used by dump_words() 143 // Flags used by write_results() 144 bool tilde_crunch_written; 145 bool last_char_was_newline; 146 bool last_char_was_tilde; 147 bool write_results_empty_block; 148 }; 149 150 // Struct to hold all the pointers to relevant data for processing a word. 151 struct WordData { WordDataWordData152 WordData() : word(nullptr), row(nullptr), block(nullptr), prev_word(nullptr) {} WordDataWordData153 explicit WordData(const PAGE_RES_IT &page_res_it) 154 : word(page_res_it.word()) 155 , row(page_res_it.row()->row) 156 , block(page_res_it.block()->block) 157 , prev_word(nullptr) {} WordDataWordData158 WordData(BLOCK *block_in, ROW *row_in, WERD_RES *word_res) 159 : word(word_res), row(row_in), block(block_in), prev_word(nullptr) {} 160 161 WERD_RES *word; 162 ROW *row; 163 BLOCK *block; 164 WordData *prev_word; 165 PointerVector<WERD_RES> lang_words; 166 }; 167 168 // Definition of a Tesseract WordRecognizer. The WordData provides the context 169 // of row/block, in_word holds an initialized, possibly pre-classified word, 170 // that the recognizer may or may not consume (but if so it sets 171 // *in_word=nullptr) and produces one or more output words in out_words, which 172 // may be the consumed in_word, or may be generated independently. This api 173 // allows both a conventional tesseract classifier to work, or a line-level 174 // classifier that generates multiple words from a merged input. 175 using WordRecognizer = void (Tesseract::*)(const WordData &, WERD_RES **, 176 PointerVector<WERD_RES> *); 177 178 class TESS_API Tesseract : public Wordrec { 179 public: 180 Tesseract(); 181 ~Tesseract() override; 182 183 // Return appropriate dictionary 184 Dict &getDict() override; 185 186 // Clear as much used memory as possible without resetting the adaptive 187 // classifier or losing any other classifier data. 188 void Clear(); 189 // Clear all memory of adaption for this and all subclassifiers. 190 void ResetAdaptiveClassifier(); 191 // Clear the document dictionary for this and all subclassifiers. 192 void ResetDocumentDictionary(); 193 194 #ifndef DISABLED_LEGACY_ENGINE 195 // Set the equation detector. 196 void SetEquationDetect(EquationDetect *detector); 197 #endif // ndef DISABLED_LEGACY_ENGINE 198 199 // Simple accessors. reskew()200 const FCOORD &reskew() const { 201 return reskew_; 202 } 203 // Destroy any existing pix and return a pointer to the pointer. mutable_pix_binary()204 Image *mutable_pix_binary() { 205 pix_binary_.destroy(); 206 return &pix_binary_; 207 } pix_binary()208 Image pix_binary() const { 209 return pix_binary_; 210 } pix_grey()211 Image pix_grey() const { 212 return pix_grey_; 213 } set_pix_grey(Image grey_pix)214 void set_pix_grey(Image grey_pix) { 215 pix_grey_.destroy(); 216 pix_grey_ = grey_pix; 217 } pix_original()218 Image pix_original() const { 219 return pix_original_; 220 } 221 // Takes ownership of the given original_pix. set_pix_original(Image original_pix)222 void set_pix_original(Image original_pix) { 223 pix_original_.destroy(); 224 pix_original_ = original_pix; 225 // Clone to sublangs as well. 226 for (auto &lang : sub_langs_) { 227 lang->set_pix_original(original_pix ? original_pix.clone() : nullptr); 228 } 229 } 230 // Returns a pointer to a Pix representing the best available resolution image 231 // of the page, with best available bit depth as second priority. Result can 232 // be of any bit depth, but never color-mapped, as that has always been 233 // removed. Note that in grey and color, 0 is black and 255 is 234 // white. If the input was binary, then black is 1 and white is 0. 235 // To tell the difference pixGetDepth() will return 32, 8 or 1. 236 // In any case, the return value is a borrowed Pix, and should not be 237 // deleted or pixDestroyed. BestPix()238 Image BestPix() const { 239 if (pixGetWidth(pix_original_) == ImageWidth()) { 240 return pix_original_; 241 } else if (pix_grey_ != nullptr) { 242 return pix_grey_; 243 } else { 244 return pix_binary_; 245 } 246 } set_pix_thresholds(Image thresholds)247 void set_pix_thresholds(Image thresholds) { 248 pix_thresholds_.destroy(); 249 pix_thresholds_ = thresholds; 250 } source_resolution()251 int source_resolution() const { 252 return source_resolution_; 253 } set_source_resolution(int ppi)254 void set_source_resolution(int ppi) { 255 source_resolution_ = ppi; 256 } ImageWidth()257 int ImageWidth() const { 258 return pixGetWidth(pix_binary_); 259 } ImageHeight()260 int ImageHeight() const { 261 return pixGetHeight(pix_binary_); 262 } scaled_color()263 Image scaled_color() const { 264 return scaled_color_; 265 } scaled_factor()266 int scaled_factor() const { 267 return scaled_factor_; 268 } SetScaledColor(int factor,Image color)269 void SetScaledColor(int factor, Image color) { 270 scaled_factor_ = factor; 271 scaled_color_ = color; 272 } textord()273 const Textord &textord() const { 274 return textord_; 275 } mutable_textord()276 Textord *mutable_textord() { 277 return &textord_; 278 } 279 right_to_left()280 bool right_to_left() const { 281 return right_to_left_; 282 } num_sub_langs()283 int num_sub_langs() const { 284 return sub_langs_.size(); 285 } get_sub_lang(int index)286 Tesseract *get_sub_lang(int index) const { 287 return sub_langs_[index]; 288 } 289 // Returns true if any language uses Tesseract (as opposed to LSTM). AnyTessLang()290 bool AnyTessLang() const { 291 if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) { 292 return true; 293 } 294 for (auto &lang : sub_langs_) { 295 if (lang->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) { 296 return true; 297 } 298 } 299 return false; 300 } 301 // Returns true if any language uses the LSTM. AnyLSTMLang()302 bool AnyLSTMLang() const { 303 if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) { 304 return true; 305 } 306 for (auto &lang : sub_langs_) { 307 if (lang->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) { 308 return true; 309 } 310 } 311 return false; 312 } 313 314 void SetBlackAndWhitelist(); 315 316 // Perform steps to prepare underlying binary image/other data structures for 317 // page segmentation. Uses the strategy specified in the global variable 318 // pageseg_devanagari_split_strategy for perform splitting while preparing for 319 // page segmentation. 320 void PrepareForPageseg(); 321 322 // Perform steps to prepare underlying binary image/other data structures for 323 // Tesseract OCR. The current segmentation is required by this method. 324 // Uses the strategy specified in the global variable 325 // ocr_devanagari_split_strategy for performing splitting while preparing for 326 // Tesseract ocr. 327 void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr); 328 329 int SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr); 330 void SetupWordScripts(BLOCK_LIST *blocks); 331 int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, 332 BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr); 333 ColumnFinder *SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, 334 Tesseract *osd_tess, OSResults *osr, 335 TO_BLOCK_LIST *to_blocks, Image *photo_mask_pix, 336 Image *music_mask_pix); 337 // par_control.cpp 338 void PrerecAllWordsPar(const std::vector<WordData> &words); 339 340 //// linerec.cpp 341 // Generates training data for training a line recognizer, eg LSTM. 342 // Breaks the page into lines, according to the boxes, and writes them to a 343 // serialized DocumentData based on output_basename. 344 // Return true if successful, false if an error occurred. 345 bool TrainLineRecognizer(const char *input_imagename, const std::string &output_basename, 346 BLOCK_LIST *block_list); 347 // Generates training data for training a line recognizer, eg LSTM. 348 // Breaks the boxes into lines, normalizes them, converts to ImageData and 349 // appends them to the given training_data. 350 void TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts, 351 BLOCK_LIST *block_list, DocumentData *training_data); 352 353 // Returns an Imagedata containing the image of the given textline, 354 // and ground truth boxes/truth text if available in the input. 355 // The image is not normalized in any way. 356 ImageData *GetLineData(const TBOX &line_box, const std::vector<TBOX> &boxes, 357 const std::vector<std::string> &texts, int start_box, int end_box, 358 const BLOCK &block); 359 // Helper gets the image of a rectangle, using the block.re_rotation() if 360 // needed to get to the image, and rotating the result back to horizontal 361 // layout. (CJK characters will be on their left sides) The vertical text flag 362 // is set in the returned ImageData if the text was originally vertical, which 363 // can be used to invoke a different CJK recognition engine. The revised_box 364 // is also returned to enable calculation of output bounding boxes. 365 ImageData *GetRectImage(const TBOX &box, const BLOCK &block, int padding, 366 TBOX *revised_box) const; 367 // Recognizes a word or group of words, converting to WERD_RES in *words. 368 // Analogous to classify_word_pass1, but can handle a group of words as well. 369 void LSTMRecognizeWord(const BLOCK &block, ROW *row, WERD_RES *word, 370 PointerVector<WERD_RES> *words); 371 // Apply segmentation search to the given set of words, within the constraints 372 // of the existing ratings matrix. If there is already a best_choice on a word 373 // leaves it untouched and just sets the done/accepted etc flags. 374 void SearchWords(PointerVector<WERD_RES> *words); 375 376 //// control.h ///////////////////////////////////////////////////////// 377 bool ProcessTargetWord(const TBOX &word_box, const TBOX &target_word_box, const char *word_config, 378 int pass); 379 // Sets up the words ready for whichever engine is to be run 380 void SetupAllWordsPassN(int pass_n, const TBOX *target_word_box, const char *word_config, 381 PAGE_RES *page_res, std::vector<WordData> *words); 382 // Sets up the single word ready for whichever engine is to be run. 383 void SetupWordPassN(int pass_n, WordData *word); 384 // Runs word recognition on all the words. 385 bool RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT *pr_it, 386 std::vector<WordData> *words); 387 bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, 388 const char *word_config, int dopasses); 389 void rejection_passes(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, 390 const char *word_config); 391 void bigram_correction_pass(PAGE_RES *page_res); 392 void blamer_pass(PAGE_RES *page_res); 393 // Sets script positions and detects smallcaps on all output words. 394 void script_pos_pass(PAGE_RES *page_res); 395 // Helper to recognize the word using the given (language-specific) tesseract. 396 // Returns positive if this recognizer found more new best words than the 397 // number kept from best_words. 398 int RetryWithLanguage(const WordData &word_data, WordRecognizer recognizer, bool debug, 399 WERD_RES **in_word, PointerVector<WERD_RES> *best_words); 400 // Moves good-looking "noise"/diacritics from the reject list to the main 401 // blob list on the current word. Returns true if anything was done, and 402 // sets make_next_word_fuzzy if blob(s) were added to the end of the word. 403 bool ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next_word_fuzzy); 404 // Attempts to put noise/diacritic outlines into the blobs that they overlap. 405 // Input: a set of noisy outlines that probably belong to the real_word. 406 // Output: outlines that overlapped blobs are set to nullptr and put back into 407 // the word, either in the blobs or in the reject list. 408 void AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *> &outlines, int pass, 409 WERD *real_word, PAGE_RES_IT *pr_it, 410 std::vector<bool> *word_wanted, 411 std::vector<bool> *overlapped_any_blob, 412 std::vector<C_BLOB *> *target_blobs); 413 // Attempts to assign non-overlapping outlines to their nearest blobs or 414 // make new blobs out of them. 415 void AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outlines, int pass, 416 WERD *real_word, PAGE_RES_IT *pr_it, 417 std::vector<bool> *word_wanted, 418 std::vector<C_BLOB *> *target_blobs); 419 // Starting with ok_outlines set to indicate which outlines overlap the blob, 420 // chooses the optimal set (approximately) and returns true if any outlines 421 // are desired, in which case ok_outlines indicates which ones. 422 bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold, PAGE_RES_IT *pr_it, 423 C_BLOB *blob, const std::vector<C_OUTLINE *> &outlines, 424 int num_outlines, std::vector<bool> *ok_outlines); 425 // Classifies the given blob plus the outlines flagged by ok_outlines, undoes 426 // the inclusion of the outlines, and returns the certainty of the raw choice. 427 float ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines, 428 const std::vector<C_OUTLINE *> &outlines, int pass_n, 429 PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str); 430 // Classifies the given blob (part of word_data->word->word) as an individual 431 // word, using languages, chopper etc, returning only the certainty of the 432 // best raw choice, and undoing all the work done to fake out the word. 433 float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str, 434 float *c2); 435 void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data); 436 void classify_word_pass1(const WordData &word_data, WERD_RES **in_word, 437 PointerVector<WERD_RES> *out_words); 438 void recog_pseudo_word(PAGE_RES *page_res, // blocks to check 439 TBOX &selection_box); 440 441 void fix_rep_char(PAGE_RES_IT *page_res_it); 442 443 ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, 444 const char *lengths); 445 void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK *block); 446 void classify_word_pass2(const WordData &word_data, WERD_RES **in_word, 447 PointerVector<WERD_RES> *out_words); 448 void ReportXhtFixResult(bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word); 449 bool RunOldFixXht(WERD_RES *word, BLOCK *block, ROW *row); 450 bool TrainedXheightFix(WERD_RES *word, BLOCK *block, ROW *row); 451 // Runs recognition with the test baseline shift and x-height and returns true 452 // if there was an improvement in recognition result. 453 bool TestNewNormalization(int original_misfits, float baseline_shift, float new_x_ht, 454 WERD_RES *word, BLOCK *block, ROW *row); 455 bool recog_interactive(PAGE_RES_IT *pr_it); 456 457 // Set fonts of this word. 458 void set_word_fonts(WERD_RES *word); 459 void font_recognition_pass(PAGE_RES *page_res); 460 void dictionary_correction_pass(PAGE_RES *page_res); 461 bool check_debug_pt(WERD_RES *word, int location); 462 463 //// superscript.cpp //////////////////////////////////////////////////// 464 bool SubAndSuperscriptFix(WERD_RES *word_res); 465 void GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading, 466 ScriptPos *leading_pos, float *leading_certainty, 467 int *num_rebuilt_trailing, ScriptPos *trailing_pos, 468 float *trailing_certainty, float *avg_certainty, 469 float *unlikely_threshold); 470 WERD_RES *TrySuperscriptSplits(int num_chopped_leading, float leading_certainty, 471 ScriptPos leading_pos, int num_chopped_trailing, 472 float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, 473 bool *is_good, int *retry_leading, int *retry_trailing); 474 bool BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold, 475 int *left_ok, int *right_ok) const; 476 477 //// output.h ////////////////////////////////////////////////////////// 478 479 void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box); 480 void write_results(PAGE_RES_IT &page_res_it, // full info 481 char newline_type, // type of newline 482 bool force_eol // override tilde crunch? 483 ); 484 void set_unlv_suspects(WERD_RES *word); 485 UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated? 486 bool acceptable_number_string(const char *s, const char *lengths); 487 int16_t count_alphanums(const WERD_CHOICE &word); 488 int16_t count_alphas(const WERD_CHOICE &word); 489 490 void read_config_file(const char *filename, SetParamConstraint constraint); 491 // Initialize for potentially a set of languages defined by the language 492 // string and recursively any additional languages required by any language 493 // traineddata file (via tessedit_load_sublangs in its config) that is loaded. 494 // See init_tesseract_internal for args. 495 int init_tesseract(const std::string &arg0, const std::string &textbase, 496 const std::string &language, OcrEngineMode oem, char **configs, 497 int configs_size, const std::vector<std::string> *vars_vec, 498 const std::vector<std::string> *vars_values, bool set_only_non_debug_params, 499 TessdataManager *mgr); init_tesseract(const std::string & datapath,const std::string & language,OcrEngineMode oem)500 int init_tesseract(const std::string &datapath, const std::string &language, OcrEngineMode oem) { 501 TessdataManager mgr; 502 return init_tesseract(datapath, {}, language, oem, nullptr, 0, nullptr, nullptr, false, &mgr); 503 } 504 // Common initialization for a single language. 505 // arg0 is the datapath for the tessdata directory, which could be the 506 // path of the tessdata directory with no trailing /, or (if tessdata 507 // lives in the same directory as the executable, the path of the executable, 508 // hence the name arg0. 509 // textbase is an optional output file basename (used only for training) 510 // language is the language code to load. 511 // oem controls which engine(s) will operate on the image 512 // configs (argv) is an array of config filenames to load variables from. 513 // May be nullptr. 514 // configs_size (argc) is the number of elements in configs. 515 // vars_vec is an optional vector of variables to set. 516 // vars_values is an optional corresponding vector of values for the variables 517 // in vars_vec. 518 // If set_only_non_debug_params is true, only params that do not contain 519 // "debug" in the name will be set. 520 int init_tesseract_internal(const std::string &arg0, const std::string &textbase, 521 const std::string &language, OcrEngineMode oem, char **configs, 522 int configs_size, const std::vector<std::string> *vars_vec, 523 const std::vector<std::string> *vars_values, 524 bool set_only_non_debug_params, TessdataManager *mgr); 525 526 // Set the universal_id member of each font to be unique among all 527 // instances of the same font loaded. 528 void SetupUniversalFontIds(); 529 530 void recognize_page(std::string &image_name); 531 void end_tesseract(); 532 533 bool init_tesseract_lang_data(const std::string &arg0, 534 const std::string &language, OcrEngineMode oem, char **configs, 535 int configs_size, const std::vector<std::string> *vars_vec, 536 const std::vector<std::string> *vars_values, 537 bool set_only_non_debug_params, TessdataManager *mgr); 538 539 void ParseLanguageString(const std::string &lang_str, std::vector<std::string> *to_load, 540 std::vector<std::string> *not_to_load); 541 542 //// pgedit.h ////////////////////////////////////////////////////////// 543 SVMenuNode *build_menu_new(); 544 #ifndef GRAPHICS_DISABLED 545 void pgeditor_main(int width, int height, PAGE_RES *page_res); 546 547 void process_image_event( // action in image win 548 const SVEvent &event); 549 bool process_cmd_win_event( // UI command semantics 550 int32_t cmd_event, // which menu item? 551 char *new_value // any prompt data 552 ); 553 #endif // !GRAPHICS_DISABLED 554 void debug_word(PAGE_RES *page_res, const TBOX &selection_box); 555 void do_re_display(bool (tesseract::Tesseract::*word_painter)(PAGE_RES_IT *pr_it)); 556 bool word_display(PAGE_RES_IT *pr_it); 557 bool word_bln_display(PAGE_RES_IT *pr_it); 558 bool word_blank_and_set_display(PAGE_RES_IT *pr_its); 559 bool word_set_display(PAGE_RES_IT *pr_it); 560 // #ifndef GRAPHICS_DISABLED 561 bool word_dumper(PAGE_RES_IT *pr_it); 562 // #endif // !GRAPHICS_DISABLED 563 void blob_feature_display(PAGE_RES *page_res, const TBOX &selection_box); 564 //// reject.h ////////////////////////////////////////////////////////// 565 // make rej map for word 566 void make_reject_map(WERD_RES *word, ROW *row, int16_t pass); 567 bool one_ell_conflict(WERD_RES *word_res, bool update_map); 568 int16_t first_alphanum_index(const char *word, const char *word_lengths); 569 int16_t first_alphanum_offset(const char *word, const char *word_lengths); 570 int16_t alpha_count(const char *word, const char *word_lengths); 571 bool word_contains_non_1_digit(const char *word, const char *word_lengths); 572 void dont_allow_1Il(WERD_RES *word); 573 int16_t count_alphanums( // how many alphanums 574 WERD_RES *word); 575 void flip_0O(WERD_RES *word); 576 bool non_0_digit(const UNICHARSET &ch_set, UNICHAR_ID unichar_id); 577 bool non_O_upper(const UNICHARSET &ch_set, UNICHAR_ID unichar_id); 578 bool repeated_nonalphanum_wd(WERD_RES *word, ROW *row); 579 void nn_match_word( // Match a word 580 WERD_RES *word, ROW *row); 581 void nn_recover_rejects(WERD_RES *word, ROW *row); 582 void set_done( // set done flag 583 WERD_RES *word, int16_t pass); 584 int16_t safe_dict_word(const WERD_RES *werd_res); // is best_choice in dict? 585 void flip_hyphens(WERD_RES *word); 586 void reject_I_1_L(WERD_RES *word); 587 void reject_edge_blobs(WERD_RES *word); 588 void reject_mostly_rejects(WERD_RES *word); 589 //// adaptions.h /////////////////////////////////////////////////////// 590 bool word_adaptable( // should we adapt? 591 WERD_RES *word, uint16_t mode); 592 593 //// tfacepp.cpp /////////////////////////////////////////////////////// 594 void recog_word_recursive(WERD_RES *word); 595 void recog_word(WERD_RES *word); 596 void split_and_recog_word(WERD_RES *word); 597 void split_word(WERD_RES *word, unsigned split_pt, WERD_RES **right_piece, 598 BlamerBundle **orig_blamer_bundle) const; 599 void join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const; 600 //// fixspace.cpp /////////////////////////////////////////////////////// 601 bool digit_or_numeric_punct(WERD_RES *word, int char_position); 602 int16_t eval_word_spacing(WERD_RES_LIST &word_res_list); 603 void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK *block); 604 int16_t fp_eval_word_spacing(WERD_RES_LIST &word_res_list); 605 void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block); 606 void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block); 607 void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block); 608 void fix_fuzzy_spaces( // find fuzzy words 609 ETEXT_DESC *monitor, // progress monitor 610 int32_t word_count, // count of words in doc 611 PAGE_RES *page_res); 612 void dump_words(WERD_RES_LIST &perm, int16_t score, int16_t mode, bool improved); 613 bool fixspace_thinks_word_done(WERD_RES *word); 614 int16_t worst_noise_blob(WERD_RES *word_res, float *worst_noise_score); 615 float blob_noise_score(TBLOB *blob); 616 void break_noisiest_blob_word(WERD_RES_LIST &words); 617 //// docqual.cpp //////////////////////////////////////////////////////// 618 #ifndef DISABLED_LEGACY_ENGINE 619 GARBAGE_LEVEL garbage_word(WERD_RES *word, bool ok_dict_word); 620 bool potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level, bool ok_dict_word); 621 #endif 622 void tilde_crunch(PAGE_RES_IT &page_res_it); 623 void unrej_good_quality_words( // unreject potential 624 PAGE_RES_IT &page_res_it); 625 void doc_and_block_rejection( // reject big chunks 626 PAGE_RES_IT &page_res_it, bool good_quality_doc); 627 void quality_based_rejection(PAGE_RES_IT &page_res_it, bool good_quality_doc); 628 void convert_bad_unlv_chs(WERD_RES *word_res); 629 void tilde_delete(PAGE_RES_IT &page_res_it); 630 int16_t word_blob_quality(WERD_RES *word); 631 void word_char_quality(WERD_RES *word, int16_t *match_count, int16_t *accepted_match_count); 632 void unrej_good_chs(WERD_RES *word); 633 int16_t count_outline_errs(char c, int16_t outline_count); 634 int16_t word_outline_errs(WERD_RES *word); 635 #ifndef DISABLED_LEGACY_ENGINE 636 bool terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level); 637 #endif 638 CRUNCH_MODE word_deletable(WERD_RES *word, int16_t &delete_mode); 639 int16_t failure_count(WERD_RES *word); 640 bool noise_outlines(TWERD *word); 641 //// pagewalk.cpp /////////////////////////////////////////////////////// 642 void process_selected_words(PAGE_RES *page_res, // blocks to check 643 // function to call 644 TBOX &selection_box, 645 bool (tesseract::Tesseract::*word_processor)(PAGE_RES_IT *pr_it)); 646 //// tessbox.cpp /////////////////////////////////////////////////////// 647 void tess_add_doc_word( // test acceptability 648 WERD_CHOICE *word_choice // after context 649 ); 650 void tess_segment_pass_n(int pass_n, WERD_RES *word); 651 bool tess_acceptable_word(WERD_RES *word); 652 653 //// applybox.cpp ////////////////////////////////////////////////////// 654 // Applies the box file based on the image name filename, and resegments 655 // the words in the block_list (page), with: 656 // blob-mode: one blob per line in the box file, words as input. 657 // word/line-mode: one blob per space-delimited unit after the #, and one word 658 // per line in the box file. (See comment above for box file format.) 659 // If find_segmentation is true, (word/line mode) then the classifier is used 660 // to re-segment words/lines to match the space-delimited truth string for 661 // each box. In this case, the input box may be for a word or even a whole 662 // text line, and the output words will contain multiple blobs corresponding 663 // to the space-delimited input string. 664 // With find_segmentation false, no classifier is needed, but the chopper 665 // can still be used to correctly segment touching characters with the help 666 // of the input boxes. 667 // In the returned PAGE_RES, the WERD_RES are setup as they would be returned 668 // from normal classification, ie. with a word, chopped_word, rebuild_word, 669 // seam_array, denorm, box_word, and best_state, but NO best_choice or 670 // raw_choice, as they would require a UNICHARSET, which we aim to avoid. 671 // Instead, the correct_text member of WERD_RES is set, and this may be later 672 // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords 673 // is not required before calling ApplyBoxTraining. 674 PAGE_RES *ApplyBoxes(const char *filename, bool find_segmentation, BLOCK_LIST *block_list); 675 676 // Any row xheight that is significantly different from the median is set 677 // to the median. 678 void PreenXHeights(BLOCK_LIST *block_list); 679 680 // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: 681 // All fuzzy spaces are removed, and all the words are maximally chopped. 682 PAGE_RES *SetupApplyBoxes(const std::vector<TBOX> &boxes, BLOCK_LIST *block_list); 683 // Tests the chopper by exhaustively running chop_one_blob. 684 // The word_res will contain filled chopped_word, seam_array, denorm, 685 // box_word and best_state for the maximally chopped word. 686 void MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block, ROW *row, 687 WERD_RES *word_res); 688 // Gather consecutive blobs that match the given box into the best_state 689 // and corresponding correct_text. 690 // Fights over which box owns which blobs are settled by pre-chopping and 691 // applying the blobs to box or next_box with the least non-overlap. 692 // Returns false if the box was in error, which can only be caused by 693 // failing to find an appropriate blob for a box. 694 // This means that occasionally, blobs may be incorrectly segmented if the 695 // chopper fails to find a suitable chop point. 696 bool ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box, 697 const TBOX *next_box, const char *correct_text); 698 // Consume all source blobs that strongly overlap the given box, 699 // putting them into a new word, with the correct_text label. 700 // Fights over which box owns which blobs are settled by 701 // applying the blobs to box or next_box with the least non-overlap. 702 // Returns false if the box was in error, which can only be caused by 703 // failing to find an overlapping blob for a box. 704 bool ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const TBOX *next_box, 705 const char *correct_text); 706 // Resegments the words by running the classifier in an attempt to find the 707 // correct segmentation that produces the required string. 708 void ReSegmentByClassification(PAGE_RES *page_res); 709 // Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID. 710 // Returns false if an invalid UNICHAR_ID is encountered. 711 bool ConvertStringToUnichars(const char *utf8, std::vector<UNICHAR_ID> *class_ids); 712 // Resegments the word to achieve the target_text from the classifier. 713 // Returns false if the re-segmentation fails. 714 // Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and 715 // applies a full search on the classifier results to find the best classified 716 // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity 717 // substitutions ARE used. 718 bool FindSegmentation(const std::vector<UNICHAR_ID> &target_text, WERD_RES *word_res); 719 // Recursive helper to find a match to the target_text (from text_index 720 // position) in the choices (from choices_pos position). 721 // Choices is an array of vectors of length choices_length, with each 722 // element representing a starting position in the word, and the 723 // vector holding classification results for a sequence of consecutive 724 // blobs, with index 0 being a single blob, index 1 being 2 blobs etc. 725 void SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, int choices_pos, 726 unsigned choices_length, const std::vector<UNICHAR_ID> &target_text, 727 unsigned text_index, float rating, std::vector<int> *segmentation, 728 float *best_rating, std::vector<int> *best_segmentation); 729 // Counts up the labelled words and the blobs within. 730 // Deletes all unused or emptied words, counting the unused ones. 731 // Resets W_BOL and W_EOL flags correctly. 732 // Builds the rebuild_word and rebuilds the box_word. 733 void TidyUp(PAGE_RES *page_res); 734 // Logs a bad box by line in the box file and box coords. 735 void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg); 736 // Creates a fake best_choice entry in each WERD_RES with the correct text. 737 void CorrectClassifyWords(PAGE_RES *page_res); 738 // Call LearnWord to extract features for labelled blobs within each word. 739 // Features are stored in an internal buffer. 740 void ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res); 741 742 //// fixxht.cpp /////////////////////////////////////////////////////// 743 // Returns the number of misfit blob tops in this word. 744 int CountMisfitTops(WERD_RES *word_res); 745 // Returns a new x-height in pixels (original image coords) that is 746 // maximally compatible with the result in word_res. 747 // Returns 0.0f if no x-height is found that is better than the current 748 // estimate. 749 float ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift); 750 //// Data members /////////////////////////////////////////////////////// 751 // TODO(ocr-team): Find and remove obsolete parameters. 752 BOOL_VAR_H(tessedit_resegment_from_boxes); 753 BOOL_VAR_H(tessedit_resegment_from_line_boxes); 754 BOOL_VAR_H(tessedit_train_from_boxes); 755 BOOL_VAR_H(tessedit_make_boxes_from_boxes); 756 BOOL_VAR_H(tessedit_train_line_recognizer); 757 BOOL_VAR_H(tessedit_dump_pageseg_images); 758 BOOL_VAR_H(tessedit_do_invert); 759 INT_VAR_H(tessedit_pageseg_mode); 760 INT_VAR_H(thresholding_method); 761 BOOL_VAR_H(thresholding_debug); 762 double_VAR_H(thresholding_window_size); 763 double_VAR_H(thresholding_kfactor); 764 double_VAR_H(thresholding_tile_size); 765 double_VAR_H(thresholding_smooth_kernel_size); 766 double_VAR_H(thresholding_score_fraction); 767 INT_VAR_H(tessedit_ocr_engine_mode); 768 STRING_VAR_H(tessedit_char_blacklist); 769 STRING_VAR_H(tessedit_char_whitelist); 770 STRING_VAR_H(tessedit_char_unblacklist); 771 BOOL_VAR_H(tessedit_ambigs_training); 772 INT_VAR_H(pageseg_devanagari_split_strategy); 773 INT_VAR_H(ocr_devanagari_split_strategy); 774 STRING_VAR_H(tessedit_write_params_to_file); 775 BOOL_VAR_H(tessedit_adaption_debug); 776 INT_VAR_H(bidi_debug); 777 INT_VAR_H(applybox_debug); 778 INT_VAR_H(applybox_page); 779 STRING_VAR_H(applybox_exposure_pattern); 780 BOOL_VAR_H(applybox_learn_chars_and_char_frags_mode); 781 BOOL_VAR_H(applybox_learn_ngrams_mode); 782 BOOL_VAR_H(tessedit_display_outwords); 783 BOOL_VAR_H(tessedit_dump_choices); 784 BOOL_VAR_H(tessedit_timing_debug); 785 BOOL_VAR_H(tessedit_fix_fuzzy_spaces); 786 BOOL_VAR_H(tessedit_unrej_any_wd); 787 BOOL_VAR_H(tessedit_fix_hyphens); 788 BOOL_VAR_H(tessedit_enable_doc_dict); 789 BOOL_VAR_H(tessedit_debug_fonts); 790 INT_VAR_H(tessedit_font_id); 791 BOOL_VAR_H(tessedit_debug_block_rejection); 792 BOOL_VAR_H(tessedit_enable_bigram_correction); 793 BOOL_VAR_H(tessedit_enable_dict_correction); 794 INT_VAR_H(tessedit_bigram_debug); 795 BOOL_VAR_H(enable_noise_removal); 796 INT_VAR_H(debug_noise_removal); 797 // Worst (min) certainty, for which a diacritic is allowed to make the base 798 // character worse and still be included. 799 double_VAR_H(noise_cert_basechar); 800 // Worst (min) certainty, for which a non-overlapping diacritic is allowed to 801 // make the base character worse and still be included. 802 double_VAR_H(noise_cert_disjoint); 803 // Worst (min) certainty, for which a diacritic is allowed to make a new 804 // stand-alone blob. 805 double_VAR_H(noise_cert_punc); 806 // Factor of certainty margin for adding diacritics to not count as worse. 807 double_VAR_H(noise_cert_factor); 808 INT_VAR_H(noise_maxperblob); 809 INT_VAR_H(noise_maxperword); 810 INT_VAR_H(debug_x_ht_level); 811 STRING_VAR_H(chs_leading_punct); 812 STRING_VAR_H(chs_trailing_punct1); 813 STRING_VAR_H(chs_trailing_punct2); 814 double_VAR_H(quality_rej_pc); 815 double_VAR_H(quality_blob_pc); 816 double_VAR_H(quality_outline_pc); 817 double_VAR_H(quality_char_pc); 818 INT_VAR_H(quality_min_initial_alphas_reqd); 819 INT_VAR_H(tessedit_tess_adaption_mode); 820 BOOL_VAR_H(tessedit_minimal_rej_pass1); 821 BOOL_VAR_H(tessedit_test_adaption); 822 BOOL_VAR_H(test_pt); 823 double_VAR_H(test_pt_x); 824 double_VAR_H(test_pt_y); 825 INT_VAR_H(multilang_debug_level); 826 INT_VAR_H(paragraph_debug_level); 827 BOOL_VAR_H(paragraph_text_based); 828 BOOL_VAR_H(lstm_use_matrix); 829 STRING_VAR_H(outlines_odd); 830 STRING_VAR_H(outlines_2); 831 BOOL_VAR_H(tessedit_good_quality_unrej); 832 BOOL_VAR_H(tessedit_use_reject_spaces); 833 double_VAR_H(tessedit_reject_doc_percent); 834 double_VAR_H(tessedit_reject_block_percent); 835 double_VAR_H(tessedit_reject_row_percent); 836 double_VAR_H(tessedit_whole_wd_rej_row_percent); 837 BOOL_VAR_H(tessedit_preserve_blk_rej_perfect_wds); 838 BOOL_VAR_H(tessedit_preserve_row_rej_perfect_wds); 839 BOOL_VAR_H(tessedit_dont_blkrej_good_wds); 840 BOOL_VAR_H(tessedit_dont_rowrej_good_wds); 841 INT_VAR_H(tessedit_preserve_min_wd_len); 842 BOOL_VAR_H(tessedit_row_rej_good_docs); 843 double_VAR_H(tessedit_good_doc_still_rowrej_wd); 844 BOOL_VAR_H(tessedit_reject_bad_qual_wds); 845 BOOL_VAR_H(tessedit_debug_doc_rejection); 846 BOOL_VAR_H(tessedit_debug_quality_metrics); 847 BOOL_VAR_H(bland_unrej); 848 double_VAR_H(quality_rowrej_pc); 849 BOOL_VAR_H(unlv_tilde_crunching); 850 BOOL_VAR_H(hocr_font_info); 851 BOOL_VAR_H(hocr_char_boxes); 852 BOOL_VAR_H(crunch_early_merge_tess_fails); 853 BOOL_VAR_H(crunch_early_convert_bad_unlv_chs); 854 double_VAR_H(crunch_terrible_rating); 855 BOOL_VAR_H(crunch_terrible_garbage); 856 double_VAR_H(crunch_poor_garbage_cert); 857 double_VAR_H(crunch_poor_garbage_rate); 858 double_VAR_H(crunch_pot_poor_rate); 859 double_VAR_H(crunch_pot_poor_cert); 860 double_VAR_H(crunch_del_rating); 861 double_VAR_H(crunch_del_cert); 862 double_VAR_H(crunch_del_min_ht); 863 double_VAR_H(crunch_del_max_ht); 864 double_VAR_H(crunch_del_min_width); 865 double_VAR_H(crunch_del_high_word); 866 double_VAR_H(crunch_del_low_word); 867 double_VAR_H(crunch_small_outlines_size); 868 INT_VAR_H(crunch_rating_max); 869 INT_VAR_H(crunch_pot_indicators); 870 BOOL_VAR_H(crunch_leave_ok_strings); 871 BOOL_VAR_H(crunch_accept_ok); 872 BOOL_VAR_H(crunch_leave_accept_strings); 873 BOOL_VAR_H(crunch_include_numerals); 874 INT_VAR_H(crunch_leave_lc_strings); 875 INT_VAR_H(crunch_leave_uc_strings); 876 INT_VAR_H(crunch_long_repetitions); 877 INT_VAR_H(crunch_debug); 878 INT_VAR_H(fixsp_non_noise_limit); 879 double_VAR_H(fixsp_small_outlines_size); 880 BOOL_VAR_H(tessedit_prefer_joined_punct); 881 INT_VAR_H(fixsp_done_mode); 882 INT_VAR_H(debug_fix_space_level); 883 STRING_VAR_H(numeric_punctuation); 884 INT_VAR_H(x_ht_acceptance_tolerance); 885 INT_VAR_H(x_ht_min_change); 886 INT_VAR_H(superscript_debug); 887 double_VAR_H(superscript_worse_certainty); 888 double_VAR_H(superscript_bettered_certainty); 889 double_VAR_H(superscript_scaledown_ratio); 890 double_VAR_H(subscript_max_y_top); 891 double_VAR_H(superscript_min_y_bottom); 892 BOOL_VAR_H(tessedit_write_block_separators); 893 BOOL_VAR_H(tessedit_write_rep_codes); 894 BOOL_VAR_H(tessedit_write_unlv); 895 BOOL_VAR_H(tessedit_create_txt); 896 BOOL_VAR_H(tessedit_create_hocr); 897 BOOL_VAR_H(tessedit_create_alto); 898 BOOL_VAR_H(tessedit_create_lstmbox); 899 BOOL_VAR_H(tessedit_create_tsv); 900 BOOL_VAR_H(tessedit_create_wordstrbox); 901 BOOL_VAR_H(tessedit_create_pdf); 902 BOOL_VAR_H(textonly_pdf); 903 INT_VAR_H(jpg_quality); 904 INT_VAR_H(user_defined_dpi); 905 INT_VAR_H(min_characters_to_try); 906 STRING_VAR_H(unrecognised_char); 907 INT_VAR_H(suspect_level); 908 INT_VAR_H(suspect_short_words); 909 BOOL_VAR_H(suspect_constrain_1Il); 910 double_VAR_H(suspect_rating_per_ch); 911 double_VAR_H(suspect_accept_rating); 912 BOOL_VAR_H(tessedit_minimal_rejection); 913 BOOL_VAR_H(tessedit_zero_rejection); 914 BOOL_VAR_H(tessedit_word_for_word); 915 BOOL_VAR_H(tessedit_zero_kelvin_rejection); 916 INT_VAR_H(tessedit_reject_mode); 917 BOOL_VAR_H(tessedit_rejection_debug); 918 BOOL_VAR_H(tessedit_flip_0O); 919 double_VAR_H(tessedit_lower_flip_hyphen); 920 double_VAR_H(tessedit_upper_flip_hyphen); 921 BOOL_VAR_H(rej_trust_doc_dawg); 922 BOOL_VAR_H(rej_1Il_use_dict_word); 923 BOOL_VAR_H(rej_1Il_trust_permuter_type); 924 BOOL_VAR_H(rej_use_tess_accepted); 925 BOOL_VAR_H(rej_use_tess_blanks); 926 BOOL_VAR_H(rej_use_good_perm); 927 BOOL_VAR_H(rej_use_sensible_wd); 928 BOOL_VAR_H(rej_alphas_in_number_perm); 929 double_VAR_H(rej_whole_of_mostly_reject_word_fract); 930 INT_VAR_H(tessedit_image_border); 931 STRING_VAR_H(ok_repeated_ch_non_alphanum_wds); 932 STRING_VAR_H(conflict_set_I_l_1); 933 INT_VAR_H(min_sane_x_ht_pixels); 934 BOOL_VAR_H(tessedit_create_boxfile); 935 INT_VAR_H(tessedit_page_number); 936 BOOL_VAR_H(tessedit_write_images); 937 BOOL_VAR_H(interactive_display_mode); 938 STRING_VAR_H(file_type); 939 BOOL_VAR_H(tessedit_override_permuter); 940 STRING_VAR_H(tessedit_load_sublangs); 941 BOOL_VAR_H(tessedit_use_primary_params_model); 942 // Min acceptable orientation margin (difference in scores between top and 2nd 943 // choice in OSResults::orientations) to believe the page orientation. 944 double_VAR_H(min_orientation_margin); 945 BOOL_VAR_H(textord_tabfind_show_vlines); 946 BOOL_VAR_H(textord_use_cjk_fp_model); 947 BOOL_VAR_H(poly_allow_detailed_fx); 948 BOOL_VAR_H(tessedit_init_config_only); 949 #ifndef DISABLED_LEGACY_ENGINE 950 BOOL_VAR_H(textord_equation_detect); 951 #endif // ndef DISABLED_LEGACY_ENGINE 952 BOOL_VAR_H(textord_tabfind_vertical_text); 953 BOOL_VAR_H(textord_tabfind_force_vertical_text); 954 double_VAR_H(textord_tabfind_vertical_text_ratio); 955 double_VAR_H(textord_tabfind_aligned_gap_fraction); 956 INT_VAR_H(tessedit_parallelize); 957 BOOL_VAR_H(preserve_interword_spaces); 958 STRING_VAR_H(page_separator); 959 INT_VAR_H(lstm_choice_mode); 960 INT_VAR_H(lstm_choice_iterations); 961 double_VAR_H(lstm_rating_coefficient); 962 BOOL_VAR_H(pageseg_apply_music_mask); 963 964 //// ambigsrecog.cpp ///////////////////////////////////////////////////////// 965 FILE *init_recog_training(const char *filename); 966 void recog_training_segmented(const char *filename, PAGE_RES *page_res, 967 volatile ETEXT_DESC *monitor, FILE *output_file); 968 void ambigs_classify_and_output(const char *label, PAGE_RES_IT *pr_it, FILE *output_file); 969 970 private: 971 // The filename of a backup config file. If not null, then we currently 972 // have a temporary debug config file loaded, and backup_config_file_ 973 // will be loaded, and set to null when debug is complete. 974 const char *backup_config_file_; 975 // The filename of a config file to read when processing a debug word. 976 std::string word_config_; 977 // Image used for input to layout analysis and tesseract recognition. 978 // May be modified by the ShiroRekhaSplitter to eliminate the top-line. 979 Image pix_binary_; 980 // Grey-level input image if the input was not binary, otherwise nullptr. 981 Image pix_grey_; 982 // Original input image. Color if the input was color. 983 Image pix_original_; 984 // Thresholds that were used to generate the thresholded image from grey. 985 Image pix_thresholds_; 986 // Debug images. If non-empty, will be written on destruction. 987 DebugPixa pixa_debug_; 988 // Input image resolution after any scaling. The resolution is not well 989 // transmitted by operations on Pix, so we keep an independent record here. 990 int source_resolution_; 991 // The shiro-rekha splitter object which is used to split top-lines in 992 // Devanagari words to provide a better word and grapheme segmentation. 993 ShiroRekhaSplitter splitter_; 994 // Page segmentation/layout 995 Textord textord_; 996 // True if the primary language uses right_to_left reading order. 997 bool right_to_left_; 998 Image scaled_color_; 999 int scaled_factor_; 1000 FCOORD deskew_; 1001 FCOORD reskew_; 1002 TesseractStats stats_; 1003 // Sub-languages to be tried in addition to this. 1004 std::vector<Tesseract *> sub_langs_; 1005 // Most recently used Tesseract out of this and sub_langs_. The default 1006 // language for the next word. 1007 Tesseract *most_recently_used_; 1008 // The size of the font table, ie max possible font id + 1. 1009 int font_table_size_; 1010 #ifndef DISABLED_LEGACY_ENGINE 1011 // Equation detector. Note: this pointer is NOT owned by the class. 1012 EquationDetect *equ_detect_; 1013 #endif // ndef DISABLED_LEGACY_ENGINE 1014 // LSTM recognizer, if available. 1015 LSTMRecognizer *lstm_recognizer_; 1016 // Output "page" number (actually line number) using TrainLineRecognizer. 1017 int train_line_page_num_; 1018 }; 1019 1020 } // namespace tesseract 1021 1022 #endif // TESSERACT_CCMAIN_TESSERACTCLASS_H_ 1023