1 ///////////////////////////////////////////////////////////////////////
2 // File:        publictypes.h
3 // Description: Types used in both the API and internally
4 // Author:      Ray Smith
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18 
19 #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
20 #define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
21 
22 namespace tesseract {
23 
24 // This file contains types that are used both by the API and internally
25 // to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
26 // dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
27 // Restated: It is OK for low-level Tesseract files to include publictypes.h,
28 // but not for the low-level tesseract code to include top-level API code.
29 // This file should not use other Tesseract types, as that would drag
30 // their includes into the API-level.
31 
32 /** Number of printers' points in an inch. The unit of the pointsize return. */
33 constexpr int kPointsPerInch = 72;
34 /**
35  * Minimum believable resolution. Used as a default if there is no other
36  * information, as it is safer to under-estimate than over-estimate.
37  */
38 constexpr int kMinCredibleResolution = 70;
39 /** Maximum believable resolution.  */
40 constexpr int kMaxCredibleResolution = 2400;
41 /**
42  * Ratio between median blob size and likely resolution. Used to estimate
43  * resolution when none is provided. This is basically 1/usual text size in
44  * inches.  */
45 constexpr int kResolutionEstimationFactor = 10;
46 
47 /**
48  * Possible types for a POLY_BLOCK or ColPartition.
49  * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
50  * below, as well as kPolyBlockNames in layout_test.cc.
51  * Used extensively by ColPartition, and POLY_BLOCK.
52  */
53 enum PolyBlockType {
54   PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
55   PT_FLOWING_TEXT,    // Text that lives inside a column.
56   PT_HEADING_TEXT,    // Text that spans more than one column.
57   PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
58   PT_EQUATION,        // Partition belonging to an equation region.
59   PT_INLINE_EQUATION, // Partition has inline equation.
60   PT_TABLE,           // Partition belonging to a table region.
61   PT_VERTICAL_TEXT,   // Text-line runs vertically.
62   PT_CAPTION_TEXT,    // Text that belongs to an image.
63   PT_FLOWING_IMAGE,   // Image that lives inside a column.
64   PT_HEADING_IMAGE,   // Image that spans more than one column.
65   PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
66   PT_HORZ_LINE,       // Horizontal Line.
67   PT_VERT_LINE,       // Vertical Line.
68   PT_NOISE,           // Lies outside of any column.
69   PT_COUNT
70 };
71 
72 /** Returns true if PolyBlockType is of horizontal line type */
PTIsLineType(PolyBlockType type)73 inline bool PTIsLineType(PolyBlockType type) {
74   return type == PT_HORZ_LINE || type == PT_VERT_LINE;
75 }
76 /** Returns true if PolyBlockType is of image type */
PTIsImageType(PolyBlockType type)77 inline bool PTIsImageType(PolyBlockType type) {
78   return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
79          type == PT_PULLOUT_IMAGE;
80 }
81 /** Returns true if PolyBlockType is of text type */
PTIsTextType(PolyBlockType type)82 inline bool PTIsTextType(PolyBlockType type) {
83   return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
84          type == PT_PULLOUT_TEXT || type == PT_TABLE ||
85          type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
86          type == PT_INLINE_EQUATION;
87 }
88 // Returns true if PolyBlockType is of pullout(inter-column) type
PTIsPulloutType(PolyBlockType type)89 inline bool PTIsPulloutType(PolyBlockType type) {
90   return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
91 }
92 
93 /**
94  *  +------------------+  Orientation Example:
95  *  | 1 Aaaa Aaaa Aaaa |  ====================
96  *  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
97  *  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
98  *  |                2 |
99  *  |   #######  c c C |  Upright Latin characters are represented as A and a.
100  *  |   #######  c c c |  '<' represents a latin character rotated
101  *  | < #######  c c c |      anti-clockwise 90 degrees.
102  *  | < #######  c   c |
103  *  | < #######  .   c |  Upright Chinese characters are represented C and c.
104  *  | 3 #######      c |
105  *  +------------------+  NOTA BENE: enum values here should match goodoc.proto
106 
107  * If you orient your head so that "up" aligns with Orientation,
108  * then the characters will appear "right side up" and readable.
109  *
110  * In the example above, both the English and Chinese paragraphs are oriented
111  * so their "up" is the top of the page (page up).  The photo credit is read
112  * with one's head turned leftward ("up" is to page left).
113  *
114  * The values of this enum match the convention of Tesseract's osdetect.h
115 */
116 enum Orientation {
117   ORIENTATION_PAGE_UP = 0,
118   ORIENTATION_PAGE_RIGHT = 1,
119   ORIENTATION_PAGE_DOWN = 2,
120   ORIENTATION_PAGE_LEFT = 3,
121 };
122 
123 /**
124  * The grapheme clusters within a line of text are laid out logically
125  * in this direction, judged when looking at the text line rotated so that
126  * its Orientation is "page up".
127  *
128  * For English text, the writing direction is left-to-right.  For the
129  * Chinese text in the above example, the writing direction is top-to-bottom.
130  */
131 enum WritingDirection {
132   WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
133   WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
134   WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
135 };
136 
137 /**
138  * The text lines are read in the given sequence.
139  *
140  * In English, the order is top-to-bottom.
141  * In Chinese, vertical text lines are read right-to-left.  Mongolian is
142  * written in vertical columns top to bottom like Chinese, but the lines
143  * order left-to right.
144  *
145  * Note that only some combinations make sense.  For example,
146  * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
147  */
148 enum TextlineOrder {
149   TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
150   TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
151   TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
152 };
153 
154 /**
155  * Possible modes for page layout analysis. These *must* be kept in order
156  * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
157  * so that the inequality test macros below work.
158  */
159 enum PageSegMode {
160   PSM_OSD_ONLY = 0,      ///< Orientation and script detection only.
161   PSM_AUTO_OSD = 1,      ///< Automatic page segmentation with orientation and
162                          ///< script detection. (OSD)
163   PSM_AUTO_ONLY = 2,     ///< Automatic page segmentation, but no OSD, or OCR.
164   PSM_AUTO = 3,          ///< Fully automatic page segmentation, but no OSD.
165   PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
166   PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
167                                   ///< vertically aligned text.
168   PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
169   PSM_SINGLE_LINE = 7,  ///< Treat the image as a single text line.
170   PSM_SINGLE_WORD = 8,  ///< Treat the image as a single word.
171   PSM_CIRCLE_WORD = 9,  ///< Treat the image as a single word in a circle.
172   PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
173   PSM_SPARSE_TEXT =
174       11, ///< Find as much text as possible in no particular order.
175   PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
176   PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
177                      ///< hacks that are Tesseract-specific.
178 
179   PSM_COUNT ///< Number of enum entries.
180 };
181 
182 /**
183  * Inline functions that act on a PageSegMode to determine whether components of
184  * layout analysis are enabled.
185  * *Depend critically on the order of elements of PageSegMode.*
186  * NOTE that arg is an int for compatibility with INT_PARAM.
187  */
PSM_OSD_ENABLED(int pageseg_mode)188 inline bool PSM_OSD_ENABLED(int pageseg_mode) {
189   return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
190 }
PSM_ORIENTATION_ENABLED(int pageseg_mode)191 inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
192   return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
193 }
PSM_COL_FIND_ENABLED(int pageseg_mode)194 inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
195   return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
196 }
PSM_SPARSE(int pageseg_mode)197 inline bool PSM_SPARSE(int pageseg_mode) {
198   return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
199 }
PSM_BLOCK_FIND_ENABLED(int pageseg_mode)200 inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
201   return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
202 }
PSM_LINE_FIND_ENABLED(int pageseg_mode)203 inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
204   return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
205 }
PSM_WORD_FIND_ENABLED(int pageseg_mode)206 inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
207   return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
208          pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
209 }
210 
211 /**
212  * enum of the elements of the page hierarchy, used in ResultIterator
213  * to provide functions that operate on each level without having to
214  * have 5x as many functions.
215  */
216 enum PageIteratorLevel {
217   RIL_BLOCK,    // Block of text/image/separator line.
218   RIL_PARA,     // Paragraph within a block.
219   RIL_TEXTLINE, // Line within a paragraph.
220   RIL_WORD,     // Word within a textline.
221   RIL_SYMBOL    // Symbol/character within a word.
222 };
223 
224 /**
225  * JUSTIFICATION_UNKNOWN
226  *   The alignment is not clearly one of the other options.  This could happen
227  *   for example if there are only one or two lines of text or the text looks
228  *   like source code or poetry.
229  *
230  * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
231  *    margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
232  *    is written with a left-to-right script and with JUSTIFICATION_RIGHT if
233  *    their text is written in a right-to-left script.
234  *
235  * Interpretation for text read in vertical lines:
236  *   "Left" is wherever the starting reading position is.
237  *
238  * JUSTIFICATION_LEFT
239  *   Each line, except possibly the first, is flush to the same left tab stop.
240  *
241  * JUSTIFICATION_CENTER
242  *   The text lines of the paragraph are centered about a line going
243  *   down through their middle of the text lines.
244  *
245  * JUSTIFICATION_RIGHT
246  *   Each line, except possibly the first, is flush to the same right tab stop.
247  */
248 enum ParagraphJustification {
249   JUSTIFICATION_UNKNOWN,
250   JUSTIFICATION_LEFT,
251   JUSTIFICATION_CENTER,
252   JUSTIFICATION_RIGHT,
253 };
254 
255 /**
256  * When Tesseract/Cube is initialized we can choose to instantiate/load/run
257  * only the Tesseract part, only the Cube part or both along with the combiner.
258  * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
259  *
260  * ATTENTION: When modifying this enum, please make sure to make the
261  * appropriate changes to all the enums mirroring it (e.g. OCREngine in
262  * cityblock/workflow/detection/detection_storage.proto). Such enums will
263  * mention the connection to OcrEngineMode in the comments.
264  */
265 enum OcrEngineMode {
266   OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
267   OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
268   OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
269                                // to Tesseract when things get difficult.
270                                // deprecated
271   OEM_DEFAULT,                 // Specify this mode when calling init_*(),
272                                // to indicate that any of the above modes
273                                // should be automatically inferred from the
274                                // variables in the language-specific config,
275                                // command-line configs, or if not specified
276                                // in any of the above should be set to the
277                                // default OEM_TESSERACT_ONLY.
278   OEM_COUNT                    // Number of OEMs
279 };
280 
281 } // namespace tesseract.
282 
283 #endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
284