1 /////////////////////////////////////////////////////////////////////// 2 // File: osdetect.h 3 // Description: Orientation and script detection. 4 // Author: Samuel Charron 5 // Ranjith Unnikrishnan 6 // 7 // (C) Copyright 2008, Google Inc. 8 // Licensed under the Apache License, Version 2.0 (the "License"); 9 // you may not use this file except in compliance with the License. 10 // You may obtain a copy of the License at 11 // http://www.apache.org/licenses/LICENSE-2.0 12 // Unless required by applicable law or agreed to in writing, software 13 // distributed under the License is distributed on an "AS IS" BASIS, 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 // See the License for the specific language governing permissions and 16 // limitations under the License. 17 // 18 /////////////////////////////////////////////////////////////////////// 19 20 #ifndef TESSERACT_CCMAIN_OSDETECT_H_ 21 #define TESSERACT_CCMAIN_OSDETECT_H_ 22 23 #include "export.h" // for TESS_API 24 25 #include <vector> // for std::vector 26 27 namespace tesseract { 28 29 class BLOBNBOX; 30 class BLOBNBOX_CLIST; 31 class BLOB_CHOICE_LIST; 32 class TO_BLOCK_LIST; 33 class UNICHARSET; 34 35 class Tesseract; 36 37 // Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur 38 const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; 39 40 struct OSBestResult { OSBestResultOSBestResult41 OSBestResult() 42 : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {} 43 int orientation_id; 44 int script_id; 45 float sconfidence; 46 float oconfidence; 47 }; 48 49 struct OSResults { OSResultsOSResults50 OSResults() : unicharset(nullptr) { 51 for (int i = 0; i < 4; ++i) { 52 for (int j = 0; j < kMaxNumberOfScripts; ++j) { 53 scripts_na[i][j] = 0; 54 } 55 orientations[i] = 0; 56 } 57 } 58 void update_best_orientation(); 59 // Set the estimate of the orientation to the given id. 60 void set_best_orientation(int orientation_id); 61 // Update/Compute the best estimate of the script assuming the given 62 // orientation id. 63 void update_best_script(int orientation_id); 64 // Return the index of the script with the highest score for this orientation. 65 TESS_API int get_best_script(int orientation_id) const; 66 // Accumulate scores with given OSResults instance and update the best script. 67 void accumulate(const OSResults &osr); 68 69 // Print statistics. 70 void print_scores(void) const; 71 void print_scores(int orientation_id) const; 72 73 // Array holding scores for each orientation id [0,3]. 74 // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the 75 // page respectively, where the values refer to the amount of clockwise 76 // rotation to be applied to the page for the text to be upright and readable. 77 float orientations[4]; 78 // Script confidence scores for each of 4 possible orientations. 79 float scripts_na[4][kMaxNumberOfScripts]; 80 81 UNICHARSET *unicharset; 82 OSBestResult best_result; 83 }; 84 85 class OrientationDetector { 86 public: 87 OrientationDetector(const std::vector<int> *allowed_scripts, 88 OSResults *results); 89 bool detect_blob(BLOB_CHOICE_LIST *scores); 90 int get_orientation(); 91 92 private: 93 OSResults *osr_; 94 const std::vector<int> *allowed_scripts_; 95 }; 96 97 class ScriptDetector { 98 public: 99 ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr, 100 tesseract::Tesseract *tess); 101 void detect_blob(BLOB_CHOICE_LIST *scores); 102 bool must_stop(int orientation) const; 103 104 private: 105 OSResults *osr_; 106 static const char *korean_script_; 107 static const char *japanese_script_; 108 static const char *fraktur_script_; 109 int korean_id_; 110 int japanese_id_; 111 int katakana_id_; 112 int hiragana_id_; 113 int han_id_; 114 int hangul_id_; 115 int latin_id_; 116 int fraktur_id_; 117 tesseract::Tesseract *tess_; 118 const std::vector<int> *allowed_scripts_; 119 }; 120 121 int orientation_and_script_detection(const char *filename, OSResults *, 122 tesseract::Tesseract *); 123 124 int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, 125 tesseract::Tesseract *tess); 126 127 int os_detect_blobs(const std::vector<int> *allowed_scripts, 128 BLOBNBOX_CLIST *blob_list, OSResults *osr, 129 tesseract::Tesseract *tess); 130 131 bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, 132 OSResults *, tesseract::Tesseract *tess); 133 134 // Helper method to convert an orientation index to its value in degrees. 135 // The value represents the amount of clockwise rotation in degrees that must be 136 // applied for the text to be upright (readable). 137 TESS_API int OrientationIdToValue(const int &id); 138 139 } // namespace tesseract 140 141 #endif // TESSERACT_CCMAIN_OSDETECT_H_ 142