1 ///////////////////////////////////////////////////////////////////////
2 // File:        blamer.cpp
3 // Description: Module allowing precise error causes to be allocated.
4 // Author:      Rike Antonova
5 // Refactored:  Ray Smith
6 //
7 // (C) Copyright 2013, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 #include "blamer.h"
21 
22 #include "blobs.h"   // for TPOINT, TWERD, TBLOB
23 #include "errcode.h" // for ASSERT_HOST
24 #if !defined(DISABLED_LEGACY_ENGINE)
25 #  include "lm_pain_points.h" // for LMPainPoints
26 #endif
27 #include "matrix.h"     // for MATRIX
28 #include "normalis.h"   // for DENORM
29 #include "pageres.h"    // for WERD_RES
30 #include "unicharset.h" // for UNICHARSET
31 
32 #include <cmath>   // for abs
33 #include <cstdlib> // for abs
34 
35 namespace tesseract {
36 
37 // Names for each value of IncorrectResultReason enum. Keep in sync.
38 const char kBlameCorrect[] = "corr";
39 const char kBlameClassifier[] = "cl";
40 const char kBlameChopper[] = "chop";
41 const char kBlameClassLMTradeoff[] = "cl/LM";
42 const char kBlamePageLayout[] = "pglt";
43 const char kBlameSegsearchHeur[] = "ss_heur";
44 const char kBlameSegsearchPP[] = "ss_pp";
45 const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
46 const char kBlameAdaption[] = "adapt";
47 const char kBlameNoTruthSplit[] = "no_tr_spl";
48 const char kBlameNoTruth[] = "no_tr";
49 const char kBlameUnknown[] = "unkn";
50 
51 const char *const kIncorrectResultReasonNames[] = {
52     kBlameCorrect,    kBlameClassifier,    kBlameChopper,     kBlameClassLMTradeoff,
53     kBlamePageLayout, kBlameSegsearchHeur, kBlameSegsearchPP, kBlameClassOldLMTradeoff,
54     kBlameAdaption,   kBlameNoTruthSplit,  kBlameNoTruth,     kBlameUnknown};
55 
IncorrectReasonName(IncorrectResultReason irr)56 const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) {
57   return kIncorrectResultReasonNames[irr];
58 }
59 
IncorrectReason() const60 const char *BlamerBundle::IncorrectReason() const {
61   return kIncorrectResultReasonNames[incorrect_result_reason_];
62 }
63 
64 // Functions to setup the blamer.
65 // Whole word string, whole word bounding box.
SetWordTruth(const UNICHARSET & unicharset,const char * truth_str,const TBOX & word_box)66 void BlamerBundle::SetWordTruth(const UNICHARSET &unicharset, const char *truth_str,
67                                 const TBOX &word_box) {
68   truth_word_.InsertBox(0, word_box);
69   truth_has_char_boxes_ = false;
70   // Encode the string as UNICHAR_IDs.
71   std::vector<UNICHAR_ID> encoding;
72   std::vector<char> lengths;
73   unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
74   int total_length = 0;
75   for (size_t i = 0; i < encoding.size(); total_length += lengths[i++]) {
76     std::string uch(truth_str + total_length);
77     uch.resize(lengths[i] - total_length);
78     UNICHAR_ID id = encoding[i];
79     if (id != INVALID_UNICHAR_ID) {
80       uch = unicharset.get_normed_unichar(id);
81     }
82     truth_text_.push_back(uch);
83   }
84 }
85 
86 // Single "character" string, "character" bounding box.
87 // May be called multiple times to indicate the characters in a word.
SetSymbolTruth(const UNICHARSET & unicharset,const char * char_str,const TBOX & char_box)88 void BlamerBundle::SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str,
89                                   const TBOX &char_box) {
90   std::string symbol_str(char_str);
91   UNICHAR_ID id = unicharset.unichar_to_id(char_str);
92   if (id != INVALID_UNICHAR_ID) {
93     std::string normed_uch(unicharset.get_normed_unichar(id));
94     if (normed_uch.length() > 0) {
95       symbol_str = normed_uch;
96     }
97   }
98   int length = truth_word_.length();
99   truth_text_.push_back(symbol_str);
100   truth_word_.InsertBox(length, char_box);
101   if (length == 0) {
102     truth_has_char_boxes_ = true;
103   } else if (truth_word_.BlobBox(length - 1) == char_box) {
104     truth_has_char_boxes_ = false;
105   }
106 }
107 
108 // Marks that there is something wrong with the truth text, like it contains
109 // reject characters.
SetRejectedTruth()110 void BlamerBundle::SetRejectedTruth() {
111   incorrect_result_reason_ = IRR_NO_TRUTH;
112   truth_has_char_boxes_ = false;
113 }
114 
115 // Returns true if the provided word_choice is correct.
ChoiceIsCorrect(const WERD_CHOICE * word_choice) const116 bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE *word_choice) const {
117   if (word_choice == nullptr) {
118     return false;
119   }
120   const UNICHARSET *uni_set = word_choice->unicharset();
121   std::string normed_choice_str;
122   for (unsigned i = 0; i < word_choice->length(); ++i) {
123     normed_choice_str += uni_set->get_normed_unichar(word_choice->unichar_id(i));
124   }
125   std::string truth_str = TruthString();
126   return truth_str == normed_choice_str;
127 }
128 
FillDebugString(const std::string & msg,const WERD_CHOICE * choice,std::string & debug)129 void BlamerBundle::FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug) {
130   debug += "Truth ";
131   for (auto &text : this->truth_text_) {
132     debug += text;
133   }
134   if (!this->truth_has_char_boxes_) {
135     debug += " (no char boxes)";
136   }
137   if (choice != nullptr) {
138     debug += " Choice ";
139     std::string choice_str;
140     choice->string_and_lengths(&choice_str, nullptr);
141     debug += choice_str;
142   }
143   if (msg.length() > 0) {
144     debug += "\n";
145     debug += msg;
146   }
147   debug += "\n";
148 }
149 
150 // Sets up the norm_truth_word from truth_word using the given DENORM.
SetupNormTruthWord(const DENORM & denorm)151 void BlamerBundle::SetupNormTruthWord(const DENORM &denorm) {
152   // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
153   norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
154   TPOINT topleft;
155   TPOINT botright;
156   TPOINT norm_topleft;
157   TPOINT norm_botright;
158   for (unsigned b = 0; b < truth_word_.length(); ++b) {
159     const TBOX &box = truth_word_.BlobBox(b);
160     topleft.x = box.left();
161     topleft.y = box.top();
162     botright.x = box.right();
163     botright.y = box.bottom();
164     denorm.NormTransform(nullptr, topleft, &norm_topleft);
165     denorm.NormTransform(nullptr, botright, &norm_botright);
166     TBOX norm_box(norm_topleft.x, norm_botright.y, norm_botright.x, norm_topleft.y);
167     norm_truth_word_.InsertBox(b, norm_box);
168   }
169 }
170 
171 // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
172 // bundles) where the right edge/ of the left-hand word is word1_right,
173 // and the left edge of the right-hand word is word2_left.
SplitBundle(int word1_right,int word2_left,bool debug,BlamerBundle * bundle1,BlamerBundle * bundle2) const174 void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
175                                BlamerBundle *bundle2) const {
176   std::string debug_str;
177   // Find truth boxes that correspond to the split in the blobs.
178   unsigned begin2_truth_index = 0;
179   if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) {
180     debug_str = "Looking for truth split at";
181     debug_str += " end1_x " + std::to_string(word1_right);
182     debug_str += " begin2_x " + std::to_string(word2_left);
183     debug_str += "\nnorm_truth_word boxes:\n";
184     if (norm_truth_word_.length() > 1) {
185       norm_truth_word_.BlobBox(0).print_to_str(debug_str);
186       for (unsigned b = 1; b < norm_truth_word_.length(); ++b) {
187         norm_truth_word_.BlobBox(b).print_to_str(debug_str);
188         if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) &&
189             (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) {
190           begin2_truth_index = b;
191           debug_str += "Split found";
192           break;
193         }
194       }
195       debug_str += '\n';
196     }
197   }
198   // Populate truth information in word and word2 with the first and second
199   // part of the original truth.
200   if (begin2_truth_index > 0) {
201     bundle1->truth_has_char_boxes_ = true;
202     bundle1->norm_box_tolerance_ = norm_box_tolerance_;
203     bundle2->truth_has_char_boxes_ = true;
204     bundle2->norm_box_tolerance_ = norm_box_tolerance_;
205     BlamerBundle *curr_bb = bundle1;
206     for (unsigned b = 0; b < norm_truth_word_.length(); ++b) {
207       if (b == begin2_truth_index) {
208         curr_bb = bundle2;
209       }
210       curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
211       curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
212       curr_bb->truth_text_.push_back(truth_text_[b]);
213     }
214   } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
215     bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
216     bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
217   } else {
218     debug_str += "Truth split not found";
219     debug_str += truth_has_char_boxes_ ? "\n" : " (no truth char boxes)\n";
220     bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
221     bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
222   }
223 }
224 
225 // "Joins" the blames from bundle1 and bundle2 into *this.
JoinBlames(const BlamerBundle & bundle1,const BlamerBundle & bundle2,bool debug)226 void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2,
227                               bool debug) {
228   std::string debug_str;
229   IncorrectResultReason irr = incorrect_result_reason_;
230   if (irr != IRR_NO_TRUTH_SPLIT) {
231     debug_str = "";
232   }
233   if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
234       bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
235       bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
236     debug_str += "Blame from part 1: ";
237     debug_str += bundle1.debug_;
238     irr = bundle1.incorrect_result_reason_;
239   }
240   if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
241       bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
242       bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
243     debug_str += "Blame from part 2: ";
244     debug_str += bundle2.debug_;
245     if (irr == IRR_CORRECT) {
246       irr = bundle2.incorrect_result_reason_;
247     } else if (irr != bundle2.incorrect_result_reason_) {
248       irr = IRR_UNKNOWN;
249     }
250   }
251   incorrect_result_reason_ = irr;
252   if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
253     SetBlame(irr, debug_str, nullptr, debug);
254   }
255 }
256 
257 // If a blob with the same bounding box as one of the truth character
258 // bounding boxes is not classified as the corresponding truth character
259 // blames character classifier for incorrect answer.
BlameClassifier(const UNICHARSET & unicharset,const TBOX & blob_box,const BLOB_CHOICE_LIST & choices,bool debug)260 void BlamerBundle::BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
261                                    const BLOB_CHOICE_LIST &choices, bool debug) {
262   if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) {
263     return; // Nothing to do here.
264   }
265 
266   for (unsigned b = 0; b < norm_truth_word_.length(); ++b) {
267     const TBOX &truth_box = norm_truth_word_.BlobBox(b);
268     // Note that we are more strict on the bounding box boundaries here
269     // than in other places (chopper, segmentation search), since we do
270     // not have the ability to check the previous and next bounding box.
271     if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) {
272       bool found = false;
273       bool incorrect_adapted = false;
274       UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
275       const char *truth_str = truth_text_[b].c_str();
276       // We promise not to modify the list or its contents, using a
277       // const BLOB_CHOICE* below.
278       BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST *>(&choices));
279       for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
280         const BLOB_CHOICE *choice = choices_it.data();
281         if (strcmp(truth_str, unicharset.get_normed_unichar(choice->unichar_id())) == 0) {
282           found = true;
283           break;
284         } else if (choice->IsAdapted()) {
285           incorrect_adapted = true;
286           incorrect_adapted_id = choice->unichar_id();
287         }
288       } // end choices_it for loop
289       if (!found) {
290         std::string debug_str = "unichar ";
291         debug_str += truth_str;
292         debug_str += " not found in classification list";
293         SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
294       } else if (incorrect_adapted) {
295         std::string debug_str = "better rating for adapted ";
296         debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
297         debug_str += " than for correct ";
298         debug_str += truth_str;
299         SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
300       }
301       break;
302     }
303   } // end iterating over blamer_bundle->norm_truth_word
304 }
305 
306 // Checks whether chops were made at all the character bounding box
307 // boundaries in word->truth_word. If not - blames the chopper for an
308 // incorrect answer.
SetChopperBlame(const WERD_RES * word,bool debug)309 void BlamerBundle::SetChopperBlame(const WERD_RES *word, bool debug) {
310   if (NoTruth() || !truth_has_char_boxes_ || word->chopped_word->blobs.empty()) {
311     return;
312   }
313   bool missing_chop = false;
314   int num_blobs = word->chopped_word->blobs.size();
315   unsigned box_index = 0;
316   int blob_index = 0;
317   int16_t truth_x = -1;
318   while (box_index < truth_word_.length() && blob_index < num_blobs) {
319     truth_x = norm_truth_word_.BlobBox(box_index).right();
320     TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
321     if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
322       ++blob_index;
323       continue; // encountered an extra chop, keep looking
324     } else if (curr_blob->bounding_box().right() > truth_x + norm_box_tolerance_) {
325       missing_chop = true;
326       break;
327     } else {
328       ++blob_index;
329     }
330   }
331   if (missing_chop || box_index < norm_truth_word_.length()) {
332     std::string debug_str;
333     if (missing_chop) {
334       debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_);
335       debug_str += ") at Bounding Box=";
336       TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
337       curr_blob->bounding_box().print_to_str(debug_str);
338       debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x);
339     } else {
340       debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index);
341       debug_str += " truth box(es)";
342     }
343     debug_str += "\nMaximally chopped word boxes:\n";
344     for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
345       TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
346       curr_blob->bounding_box().print_to_str(debug_str);
347       debug_str += '\n';
348     }
349     debug_str += "Truth  bounding  boxes:\n";
350     for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
351       norm_truth_word_.BlobBox(box_index).print_to_str(debug_str);
352       debug_str += '\n';
353     }
354     SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
355   }
356 }
357 
358 // Blames the classifier or the language model if, after running only the
359 // chopper, best_choice is incorrect and no blame has been yet set.
360 // Blames the classifier if best_choice is classifier's top choice and is a
361 // dictionary word (i.e. language model could not have helped).
362 // Otherwise, blames the language model (formerly permuter word adjustment).
BlameClassifierOrLangModel(const WERD_RES * word,const UNICHARSET & unicharset,bool valid_permuter,bool debug)363 void BlamerBundle::BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset,
364                                               bool valid_permuter, bool debug) {
365   if (valid_permuter) {
366     // Find out whether best choice is a top choice.
367     best_choice_is_dict_and_top_choice_ = true;
368     for (unsigned i = 0; i < word->best_choice->length(); ++i) {
369       BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
370       ASSERT_HOST(!blob_choice_it.empty());
371       BLOB_CHOICE *first_choice = nullptr;
372       for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
373            blob_choice_it.forward()) { // find first non-fragment choice
374         if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
375           first_choice = blob_choice_it.data();
376           break;
377         }
378       }
379       ASSERT_HOST(first_choice != nullptr);
380       if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
381         best_choice_is_dict_and_top_choice_ = false;
382         break;
383       }
384     }
385   }
386   std::string debug_str;
387   if (best_choice_is_dict_and_top_choice_) {
388     debug_str = "Best choice is: incorrect, top choice, dictionary word";
389     debug_str += " with permuter ";
390     debug_str += word->best_choice->permuter_name();
391   } else {
392     debug_str = "Classifier/Old LM tradeoff is to blame";
393   }
394   SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER : IRR_CLASS_OLD_LM_TRADEOFF,
395            debug_str, word->best_choice, debug);
396 }
397 
398 // Sets up the correct_segmentation_* to mark the correct bounding boxes.
SetupCorrectSegmentation(const TWERD * word,bool debug)399 void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
400 #ifndef DISABLED_LEGACY_ENGINE
401   params_training_bundle_.StartHypothesisList();
402 #endif //  ndef DISABLED_LEGACY_ENGINE
403   if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) {
404     return; // Nothing to do here.
405   }
406 
407   std::string debug_str = "Blamer computing correct_segmentation_cols\n";
408   int curr_box_col = 0;
409   int next_box_col = 0;
410   int num_blobs = word->NumBlobs();
411   if (num_blobs == 0) {
412     return; // No blobs to play with.
413   }
414   int blob_index = 0;
415   int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
416   for (unsigned truth_idx = 0; blob_index < num_blobs && truth_idx < norm_truth_word_.length();
417        ++blob_index) {
418     ++next_box_col;
419     int16_t curr_box_x = next_box_x;
420     if (blob_index + 1 < num_blobs) {
421       next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
422     }
423     int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
424     debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
425     debug_str += " " + std::to_string(truth_x);
426     debug_str += "\n";
427     if (curr_box_x > (truth_x + norm_box_tolerance_)) {
428       break;                                                  // failed to find a matching box
429     } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
430                (blob_index + 1 >= num_blobs ||                // next box can't be included
431                 next_box_x > truth_x + norm_box_tolerance_)) {
432       correct_segmentation_cols_.push_back(curr_box_col);
433       correct_segmentation_rows_.push_back(next_box_col - 1);
434       ++truth_idx;
435       debug_str += "col=" + std::to_string(curr_box_col);
436       debug_str += " row=" + std::to_string(next_box_col - 1);
437       debug_str += "\n";
438       curr_box_col = next_box_col;
439     }
440   }
441   if (blob_index < num_blobs || // trailing blobs
442       correct_segmentation_cols_.size() != norm_truth_word_.length()) {
443     debug_str +=
444         "Blamer failed to find correct segmentation"
445         " (tolerance=" +
446         std::to_string(norm_box_tolerance_);
447     if (blob_index >= num_blobs) {
448       debug_str += " blob == nullptr";
449     }
450     debug_str += ")\n";
451     debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
452     debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
453     debug_str += "\n";
454     SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
455     correct_segmentation_cols_.clear();
456     correct_segmentation_rows_.clear();
457   }
458 }
459 
460 // Returns true if a guided segmentation search is needed.
GuidedSegsearchNeeded(const WERD_CHOICE * best_choice) const461 bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
462   return incorrect_result_reason_ == IRR_CORRECT && !segsearch_is_looking_for_blame_ &&
463          truth_has_char_boxes_ && !ChoiceIsCorrect(best_choice);
464 }
465 
466 #if !defined(DISABLED_LEGACY_ENGINE)
467 // Setup ready to guide the segmentation search to the correct segmentation.
InitForSegSearch(const WERD_CHOICE * best_choice,MATRIX * ratings,UNICHAR_ID wildcard_id,bool debug,std::string & debug_str,tesseract::LMPainPoints * pain_points,double max_char_wh_ratio,WERD_RES * word_res)468 void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings,
469                                     UNICHAR_ID wildcard_id, bool debug, std::string &debug_str,
470                                     tesseract::LMPainPoints *pain_points, double max_char_wh_ratio,
471                                     WERD_RES *word_res) {
472   segsearch_is_looking_for_blame_ = true;
473   if (debug) {
474     tprintf("segsearch starting to look for blame\n");
475   }
476   // Fill pain points for any unclassifed blob corresponding to the
477   // correct segmentation state.
478   debug_str += "Correct segmentation:\n";
479   for (unsigned idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
480     debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]);
481     debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]);
482     debug_str += "\n";
483     if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
484                              wildcard_id) &&
485         !pain_points->GeneratePainPoint(
486             correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
487             tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) {
488       segsearch_is_looking_for_blame_ = false;
489       debug_str += "\nFailed to insert pain point\n";
490       SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug);
491       break;
492     }
493   } // end for blamer_bundle->correct_segmentation_cols/rows
494 }
495 #endif // !defined(DISABLED_LEGACY_ENGINE)
496 
497 // Returns true if the guided segsearch is in progress.
GuidedSegsearchStillGoing() const498 bool BlamerBundle::GuidedSegsearchStillGoing() const {
499   return segsearch_is_looking_for_blame_;
500 }
501 
502 // The segmentation search has ended. Sets the blame appropriately.
FinishSegSearch(const WERD_CHOICE * best_choice,bool debug,std::string & debug_str)503 void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str) {
504   // If we are still looking for blame (i.e. best_choice is incorrect, but a
505   // path representing the correct segmentation could be constructed), we can
506   // blame segmentation search pain point prioritization if the rating of the
507   // path corresponding to the correct segmentation is better than that of
508   // best_choice (i.e. language model would have done the correct thing, but
509   // because of poor pain point prioritization the correct segmentation was
510   // never explored). Otherwise we blame the tradeoff between the language model
511   // and the classifier, since even after exploring the path corresponding to
512   // the correct segmentation incorrect best_choice would have been chosen.
513   // One special case when we blame the classifier instead is when best choice
514   // is incorrect, but it is a dictionary word and it classifier's top choice.
515   if (segsearch_is_looking_for_blame_) {
516     segsearch_is_looking_for_blame_ = false;
517     if (best_choice_is_dict_and_top_choice_) {
518       debug_str = "Best choice is: incorrect, top choice, dictionary word";
519       debug_str += " with permuter ";
520       debug_str += best_choice->permuter_name();
521       SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug);
522     } else if (best_correctly_segmented_rating_ < best_choice->rating()) {
523       debug_str += "Correct segmentation state was not explored";
524       SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug);
525     } else {
526       if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) {
527         debug_str += "Correct segmentation paths were pruned by LM\n";
528       } else {
529         debug_str += "Best correct segmentation rating " +
530                                   std::to_string(best_correctly_segmented_rating_);
531         debug_str += " vs. best choice rating " + std::to_string(best_choice->rating());
532       }
533       SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug);
534     }
535   }
536 }
537 
538 // If the bundle is null or still does not indicate the correct result,
539 // fix it and use some backup reason for the blame.
LastChanceBlame(bool debug,WERD_RES * word)540 void BlamerBundle::LastChanceBlame(bool debug, WERD_RES *word) {
541   if (word->blamer_bundle == nullptr) {
542     word->blamer_bundle = new BlamerBundle();
543     word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame", word->best_choice, debug);
544   } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
545     word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth", word->best_choice, debug);
546   } else {
547     bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
548     IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
549     if (irr == IRR_CORRECT && !correct) {
550       std::string debug_str = "Choice is incorrect after recognition";
551       word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice, debug);
552     } else if (irr != IRR_CORRECT && correct) {
553       if (debug) {
554         tprintf("Corrected %s\n", word->blamer_bundle->debug_.c_str());
555       }
556       word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
557       word->blamer_bundle->debug_ = "";
558     }
559   }
560 }
561 
562 // Sets the misadaption debug if this word is incorrect, as this word is
563 // being adapted to.
SetMisAdaptionDebug(const WERD_CHOICE * best_choice,bool debug)564 void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug) {
565   if (incorrect_result_reason_ != IRR_NO_TRUTH && !ChoiceIsCorrect(best_choice)) {
566     misadaption_debug_ = "misadapt to word (";
567     misadaption_debug_ += best_choice->permuter_name();
568     misadaption_debug_ += "): ";
569     FillDebugString("", best_choice, misadaption_debug_);
570     if (debug) {
571       tprintf("%s\n", misadaption_debug_.c_str());
572     }
573   }
574 }
575 
576 } // namespace tesseract
577