1 ///////////////////////////////////////////////////////////////////////
2 // File:        osdetect.h
3 // Description: Orientation and script detection.
4 // Author:      Samuel Charron
5 //              Ranjith Unnikrishnan
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 #ifndef TESSERACT_CCMAIN_OSDETECT_H_
21 #define TESSERACT_CCMAIN_OSDETECT_H_
22 
23 #include "export.h" // for TESS_API
24 
25 #include <vector> // for std::vector
26 
27 namespace tesseract {
28 
29 class BLOBNBOX;
30 class BLOBNBOX_CLIST;
31 class BLOB_CHOICE_LIST;
32 class TO_BLOCK_LIST;
33 class UNICHARSET;
34 
35 class Tesseract;
36 
37 // Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
38 const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
39 
40 struct OSBestResult {
OSBestResultOSBestResult41   OSBestResult()
42       : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
43   int orientation_id;
44   int script_id;
45   float sconfidence;
46   float oconfidence;
47 };
48 
49 struct OSResults {
OSResultsOSResults50   OSResults() : unicharset(nullptr) {
51     for (int i = 0; i < 4; ++i) {
52       for (int j = 0; j < kMaxNumberOfScripts; ++j) {
53         scripts_na[i][j] = 0;
54       }
55       orientations[i] = 0;
56     }
57   }
58   void update_best_orientation();
59   // Set the estimate of the orientation to the given id.
60   void set_best_orientation(int orientation_id);
61   // Update/Compute the best estimate of the script assuming the given
62   // orientation id.
63   void update_best_script(int orientation_id);
64   // Return the index of the script with the highest score for this orientation.
65   TESS_API int get_best_script(int orientation_id) const;
66   // Accumulate scores with given OSResults instance and update the best script.
67   void accumulate(const OSResults &osr);
68 
69   // Print statistics.
70   void print_scores(void) const;
71   void print_scores(int orientation_id) const;
72 
73   // Array holding scores for each orientation id [0,3].
74   // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
75   // page respectively, where the values refer to the amount of clockwise
76   // rotation to be applied to the page for the text to be upright and readable.
77   float orientations[4];
78   // Script confidence scores for each of 4 possible orientations.
79   float scripts_na[4][kMaxNumberOfScripts];
80 
81   UNICHARSET *unicharset;
82   OSBestResult best_result;
83 };
84 
85 class OrientationDetector {
86 public:
87   OrientationDetector(const std::vector<int> *allowed_scripts,
88                       OSResults *results);
89   bool detect_blob(BLOB_CHOICE_LIST *scores);
90   int get_orientation();
91 
92 private:
93   OSResults *osr_;
94   const std::vector<int> *allowed_scripts_;
95 };
96 
97 class ScriptDetector {
98 public:
99   ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
100                  tesseract::Tesseract *tess);
101   void detect_blob(BLOB_CHOICE_LIST *scores);
102   bool must_stop(int orientation) const;
103 
104 private:
105   OSResults *osr_;
106   static const char *korean_script_;
107   static const char *japanese_script_;
108   static const char *fraktur_script_;
109   int korean_id_;
110   int japanese_id_;
111   int katakana_id_;
112   int hiragana_id_;
113   int han_id_;
114   int hangul_id_;
115   int latin_id_;
116   int fraktur_id_;
117   tesseract::Tesseract *tess_;
118   const std::vector<int> *allowed_scripts_;
119 };
120 
121 int orientation_and_script_detection(const char *filename, OSResults *,
122                                      tesseract::Tesseract *);
123 
124 int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
125               tesseract::Tesseract *tess);
126 
127 int os_detect_blobs(const std::vector<int> *allowed_scripts,
128                     BLOBNBOX_CLIST *blob_list, OSResults *osr,
129                     tesseract::Tesseract *tess);
130 
131 bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
132                     OSResults *, tesseract::Tesseract *tess);
133 
134 // Helper method to convert an orientation index to its value in degrees.
135 // The value represents the amount of clockwise rotation in degrees that must be
136 // applied for the text to be upright (readable).
137 TESS_API int OrientationIdToValue(const int &id);
138 
139 } // namespace tesseract
140 
141 #endif // TESSERACT_CCMAIN_OSDETECT_H_
142