1 /////////////////////////////////////////////////////////////////////// 2 // File: blamer.h 3 // Description: Module allowing precise error causes to be allocated. 4 // Author: Rike Antonova 5 // Refactored: Ray Smith 6 // 7 // (C) Copyright 2013, Google Inc. 8 // Licensed under the Apache License, Version 2.0 (the "License"); 9 // you may not use this file except in compliance with the License. 10 // You may obtain a copy of the License at 11 // http://www.apache.org/licenses/LICENSE-2.0 12 // Unless required by applicable law or agreed to in writing, software 13 // distributed under the License is distributed on an "AS IS" BASIS, 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 // See the License for the specific language governing permissions and 16 // limitations under the License. 17 // 18 /////////////////////////////////////////////////////////////////////// 19 20 #ifndef TESSERACT_CCSTRUCT_BLAMER_H_ 21 #define TESSERACT_CCSTRUCT_BLAMER_H_ 22 23 #ifdef HAVE_CONFIG_H 24 # include "config_auto.h" // DISABLED_LEGACY_ENGINE 25 #endif 26 #include "boxword.h" // for BoxWord 27 #ifndef DISABLED_LEGACY_ENGINE 28 # include "params_training_featdef.h" // for ParamsTrainingBundle, ParamsTra... 29 #endif // ndef DISABLED_LEGACY_ENGINE 30 #include "ratngs.h" // for BLOB_CHOICE_LIST (ptr only) 31 #include "rect.h" // for TBOX 32 #include "tprintf.h" // for tprintf 33 34 #include <tesseract/unichar.h> // for UNICHAR_ID 35 36 #include <cstdint> // for int16_t 37 #include <cstring> // for memcpy 38 #include <vector> // for std::vector 39 40 namespace tesseract { 41 42 class DENORM; 43 class MATRIX; 44 class UNICHARSET; 45 class WERD_RES; 46 47 struct MATRIX_COORD; 48 struct TWERD; 49 50 class LMPainPoints; 51 52 static const int16_t kBlamerBoxTolerance = 5; 53 54 // Enum for expressing the source of error. 55 // Note: Please update kIncorrectResultReasonNames when modifying this enum. 56 enum IncorrectResultReason { 57 // The text recorded in best choice == truth text 58 IRR_CORRECT, 59 // Either: Top choice is incorrect and is a dictionary word (language model 60 // is unlikely to help correct such errors, so blame the classifier). 61 // Or: the correct unichar was not included in shortlist produced by the 62 // classifier at all. 63 IRR_CLASSIFIER, 64 // Chopper have not found one or more splits that correspond to the correct 65 // character bounding boxes recorded in BlamerBundle::truth_word. 66 IRR_CHOPPER, 67 // Classifier did include correct unichars for each blob in the correct 68 // segmentation, however its rating could have been too bad to allow the 69 // language model to pull out the correct choice. On the other hand the 70 // strength of the language model might have been too weak to favor the 71 // correct answer, this we call this case a classifier-language model 72 // tradeoff error. 73 IRR_CLASS_LM_TRADEOFF, 74 // Page layout failed to produce the correct bounding box. Blame page layout 75 // if the truth was not found for the word, which implies that the bounding 76 // box of the word was incorrect (no truth word had a similar bounding box). 77 IRR_PAGE_LAYOUT, 78 // SegSearch heuristic prevented one or more blobs from the correct 79 // segmentation state to be classified (e.g. the blob was too wide). 80 IRR_SEGSEARCH_HEUR, 81 // The correct segmentaiton state was not explored because of poor SegSearch 82 // pain point prioritization. We blame SegSearch pain point prioritization 83 // if the best rating of a choice constructed from correct segmentation is 84 // better than that of the best choice (i.e. if we got to explore the correct 85 // segmentation state, language model would have picked the correct choice). 86 IRR_SEGSEARCH_PP, 87 // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word, 88 // and thus use the old language model (permuters). 89 // TODO(antonova): integrate the new language mode with chopper 90 IRR_CLASS_OLD_LM_TRADEOFF, 91 // If there is an incorrect adaptive template match with a better score than 92 // a correct one (either pre-trained or adapted), mark this as adaption error. 93 IRR_ADAPTION, 94 // split_and_recog_word() failed to find a suitable split in truth. 95 IRR_NO_TRUTH_SPLIT, 96 // Truth is not available for this word (e.g. when words in corrected content 97 // file are turned into ~~~~ because an appropriate alignment was not found. 98 IRR_NO_TRUTH, 99 // The text recorded in best choice != truth text, but none of the above 100 // reasons are set. 101 IRR_UNKNOWN, 102 103 IRR_NUM_REASONS 104 }; 105 106 // Blamer-related information to determine the source of errors. 107 struct BlamerBundle { 108 static const char *IncorrectReasonName(IncorrectResultReason irr); BlamerBundleBlamerBundle109 BlamerBundle() 110 : truth_has_char_boxes_(false) 111 , incorrect_result_reason_(IRR_CORRECT) 112 , lattice_data_(nullptr) { 113 ClearResults(); 114 } BlamerBundleBlamerBundle115 BlamerBundle(const BlamerBundle &other) { 116 this->CopyTruth(other); 117 this->CopyResults(other); 118 } ~BlamerBundleBlamerBundle119 ~BlamerBundle() { 120 delete[] lattice_data_; 121 } 122 123 // Accessors. TruthStringBlamerBundle124 std::string TruthString() const { 125 std::string truth_str; 126 for (auto &text : truth_text_) { 127 truth_str += text; 128 } 129 return truth_str; 130 } incorrect_result_reasonBlamerBundle131 IncorrectResultReason incorrect_result_reason() const { 132 return incorrect_result_reason_; 133 } NoTruthBlamerBundle134 bool NoTruth() const { 135 return incorrect_result_reason_ == IRR_NO_TRUTH || incorrect_result_reason_ == IRR_PAGE_LAYOUT; 136 } HasDebugInfoBlamerBundle137 bool HasDebugInfo() const { 138 return debug_.length() > 0 || misadaption_debug_.length() > 0; 139 } debugBlamerBundle140 const std::string &debug() const { 141 return debug_; 142 } misadaption_debugBlamerBundle143 const std::string &misadaption_debug() const { 144 return misadaption_debug_; 145 } UpdateBestRatingBlamerBundle146 void UpdateBestRating(float rating) { 147 if (rating < best_correctly_segmented_rating_) { 148 best_correctly_segmented_rating_ = rating; 149 } 150 } correct_segmentation_lengthBlamerBundle151 int correct_segmentation_length() const { 152 return correct_segmentation_cols_.size(); 153 } 154 // Returns true if the given ratings matrix col,row position is included 155 // in the correct segmentation path at the given index. MatrixPositionCorrectBlamerBundle156 bool MatrixPositionCorrect(int index, const MATRIX_COORD &coord) { 157 return correct_segmentation_cols_[index] == coord.col && 158 correct_segmentation_rows_[index] == coord.row; 159 } set_best_choice_is_dict_and_top_choiceBlamerBundle160 void set_best_choice_is_dict_and_top_choice(bool value) { 161 best_choice_is_dict_and_top_choice_ = value; 162 } lattice_dataBlamerBundle163 const char *lattice_data() const { 164 return lattice_data_; 165 } lattice_sizeBlamerBundle166 int lattice_size() const { 167 return lattice_size_; // size of lattice_data in bytes 168 } set_lattice_dataBlamerBundle169 void set_lattice_data(const char *data, int size) { 170 lattice_size_ = size; 171 delete[] lattice_data_; 172 lattice_data_ = new char[lattice_size_]; 173 memcpy(lattice_data_, data, lattice_size_); 174 } 175 #ifndef DISABLED_LEGACY_ENGINE params_training_bundleBlamerBundle176 const tesseract::ParamsTrainingBundle ¶ms_training_bundle() const { 177 return params_training_bundle_; 178 } 179 // Adds a new ParamsTrainingHypothesis to the current hypothesis list. AddHypothesisBlamerBundle180 void AddHypothesis(const tesseract::ParamsTrainingHypothesis &hypo) { 181 params_training_bundle_.AddHypothesis(hypo); 182 } 183 #endif // ndef DISABLED_LEGACY_ENGINE 184 185 // Functions to setup the blamer. 186 // Whole word string, whole word bounding box. 187 void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box); 188 // Single "character" string, "character" bounding box. 189 // May be called multiple times to indicate the characters in a word. 190 void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box); 191 // Marks that there is something wrong with the truth text, like it contains 192 // reject characters. 193 void SetRejectedTruth(); 194 195 // Returns true if the provided word_choice is correct. 196 bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const; 197 ClearResultsBlamerBundle198 void ClearResults() { 199 norm_truth_word_.DeleteAllBoxes(); 200 norm_box_tolerance_ = 0; 201 if (!NoTruth()) { 202 incorrect_result_reason_ = IRR_CORRECT; 203 } 204 debug_ = ""; 205 segsearch_is_looking_for_blame_ = false; 206 best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating; 207 correct_segmentation_cols_.clear(); 208 correct_segmentation_rows_.clear(); 209 best_choice_is_dict_and_top_choice_ = false; 210 delete[] lattice_data_; 211 lattice_data_ = nullptr; 212 lattice_size_ = 0; 213 } CopyTruthBlamerBundle214 void CopyTruth(const BlamerBundle &other) { 215 truth_has_char_boxes_ = other.truth_has_char_boxes_; 216 truth_word_ = other.truth_word_; 217 truth_text_ = other.truth_text_; 218 incorrect_result_reason_ = (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT); 219 } CopyResultsBlamerBundle220 void CopyResults(const BlamerBundle &other) { 221 norm_truth_word_ = other.norm_truth_word_; 222 norm_box_tolerance_ = other.norm_box_tolerance_; 223 incorrect_result_reason_ = other.incorrect_result_reason_; 224 segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_; 225 best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_; 226 correct_segmentation_cols_ = other.correct_segmentation_cols_; 227 correct_segmentation_rows_ = other.correct_segmentation_rows_; 228 best_choice_is_dict_and_top_choice_ = other.best_choice_is_dict_and_top_choice_; 229 if (other.lattice_data_ != nullptr) { 230 lattice_data_ = new char[other.lattice_size_]; 231 memcpy(lattice_data_, other.lattice_data_, other.lattice_size_); 232 lattice_size_ = other.lattice_size_; 233 } else { 234 lattice_data_ = nullptr; 235 } 236 } 237 const char *IncorrectReason() const; 238 239 // Appends choice and truth details to the given debug string. 240 void FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug); 241 242 // Sets up the norm_truth_word from truth_word using the given DENORM. 243 void SetupNormTruthWord(const DENORM &denorm); 244 245 // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty 246 // bundles) where the right edge/ of the left-hand word is word1_right, 247 // and the left edge of the right-hand word is word2_left. 248 void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, 249 BlamerBundle *bundle2) const; 250 // "Joins" the blames from bundle1 and bundle2 into *this. 251 void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug); 252 253 // If a blob with the same bounding box as one of the truth character 254 // bounding boxes is not classified as the corresponding truth character 255 // blames character classifier for incorrect answer. 256 void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box, 257 const BLOB_CHOICE_LIST &choices, bool debug); 258 259 // Checks whether chops were made at all the character bounding box 260 // boundaries in word->truth_word. If not - blames the chopper for an 261 // incorrect answer. 262 void SetChopperBlame(const WERD_RES *word, bool debug); 263 // Blames the classifier or the language model if, after running only the 264 // chopper, best_choice is incorrect and no blame has been yet set. 265 // Blames the classifier if best_choice is classifier's top choice and is a 266 // dictionary word (i.e. language model could not have helped). 267 // Otherwise, blames the language model (formerly permuter word adjustment). 268 void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, 269 bool valid_permuter, bool debug); 270 // Sets up the correct_segmentation_* to mark the correct bounding boxes. 271 void SetupCorrectSegmentation(const TWERD *word, bool debug); 272 273 // Returns true if a guided segmentation search is needed. 274 bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const; 275 // Setup ready to guide the segmentation search to the correct segmentation. 276 void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, 277 bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points, 278 double max_char_wh_ratio, WERD_RES *word_res); 279 // Returns true if the guided segsearch is in progress. 280 bool GuidedSegsearchStillGoing() const; 281 // The segmentation search has ended. Sets the blame appropriately. 282 void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str); 283 284 // If the bundle is null or still does not indicate the correct result, 285 // fix it and use some backup reason for the blame. 286 static void LastChanceBlame(bool debug, WERD_RES *word); 287 288 // Sets the misadaption debug if this word is incorrect, as this word is 289 // being adapted to. 290 void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug); 291 292 private: 293 // Copy assignment operator (currently unused, therefore private). 294 BlamerBundle &operator=(const BlamerBundle &other) = delete; SetBlameBlamerBundle295 void SetBlame(IncorrectResultReason irr, const std::string &msg, const WERD_CHOICE *choice, 296 bool debug) { 297 incorrect_result_reason_ = irr; 298 debug_ = IncorrectReason(); 299 debug_ += " to blame: "; 300 FillDebugString(msg, choice, debug_); 301 if (debug) { 302 tprintf("SetBlame(): %s", debug_.c_str()); 303 } 304 } 305 306 private: 307 // Set to true when bounding boxes for individual unichars are recorded. 308 bool truth_has_char_boxes_; 309 // Variables used by the segmentation search when looking for the blame. 310 // Set to true while segmentation search is continued after the usual 311 // termination condition in order to look for the blame. 312 bool segsearch_is_looking_for_blame_; 313 // Set to true if best choice is a dictionary word and 314 // classifier's top choice. 315 bool best_choice_is_dict_and_top_choice_; 316 // Tolerance for bounding box comparisons in normalized space. 317 int norm_box_tolerance_; 318 // The true_word (in the original image coordinate space) contains ground 319 // truth bounding boxes for this WERD_RES. 320 tesseract::BoxWord truth_word_; 321 // Same as above, but in normalized coordinates 322 // (filled in by WERD_RES::SetupForRecognition()). 323 tesseract::BoxWord norm_truth_word_; 324 // Contains ground truth unichar for each of the bounding boxes in truth_word. 325 std::vector<std::string> truth_text_; 326 // The reason for incorrect OCR result. 327 IncorrectResultReason incorrect_result_reason_; 328 // Debug text associated with the blame. 329 std::string debug_; 330 // Misadaption debug information (filled in if this word was misadapted to). 331 std::string misadaption_debug_; 332 // Vectors populated by SegSearch to indicate column and row indices that 333 // correspond to blobs with correct bounding boxes. 334 std::vector<int> correct_segmentation_cols_; 335 std::vector<int> correct_segmentation_rows_; 336 // Best rating for correctly segmented path 337 // (set and used by SegSearch when looking for blame). 338 float best_correctly_segmented_rating_; 339 int lattice_size_; // size of lattice_data in bytes 340 // Serialized segmentation search lattice. 341 char *lattice_data_; 342 // Information about hypotheses (paths) explored by the segmentation search. 343 #ifndef DISABLED_LEGACY_ENGINE 344 tesseract::ParamsTrainingBundle params_training_bundle_; 345 #endif // ndef DISABLED_LEGACY_ENGINE 346 }; 347 348 } // namespace tesseract 349 350 #endif // TESSERACT_CCSTRUCT_BLAMER_H_ 351