1 ///////////////////////////////////////////////////////////////////////
2 // File: publictypes.h
3 // Description: Types used in both the API and internally
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
20 #define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
21
22 namespace tesseract {
23
24 // This file contains types that are used both by the API and internally
25 // to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
26 // dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
27 // Restated: It is OK for low-level Tesseract files to include publictypes.h,
28 // but not for the low-level tesseract code to include top-level API code.
29 // This file should not use other Tesseract types, as that would drag
30 // their includes into the API-level.
31
32 /** Number of printers' points in an inch. The unit of the pointsize return. */
33 constexpr int kPointsPerInch = 72;
34 /**
35 * Minimum believable resolution. Used as a default if there is no other
36 * information, as it is safer to under-estimate than over-estimate.
37 */
38 constexpr int kMinCredibleResolution = 70;
39 /** Maximum believable resolution. */
40 constexpr int kMaxCredibleResolution = 2400;
41 /**
42 * Ratio between median blob size and likely resolution. Used to estimate
43 * resolution when none is provided. This is basically 1/usual text size in
44 * inches. */
45 constexpr int kResolutionEstimationFactor = 10;
46
47 /**
48 * Possible types for a POLY_BLOCK or ColPartition.
49 * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
50 * below, as well as kPolyBlockNames in layout_test.cc.
51 * Used extensively by ColPartition, and POLY_BLOCK.
52 */
53 enum PolyBlockType {
54 PT_UNKNOWN, // Type is not yet known. Keep as the first element.
55 PT_FLOWING_TEXT, // Text that lives inside a column.
56 PT_HEADING_TEXT, // Text that spans more than one column.
57 PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
58 PT_EQUATION, // Partition belonging to an equation region.
59 PT_INLINE_EQUATION, // Partition has inline equation.
60 PT_TABLE, // Partition belonging to a table region.
61 PT_VERTICAL_TEXT, // Text-line runs vertically.
62 PT_CAPTION_TEXT, // Text that belongs to an image.
63 PT_FLOWING_IMAGE, // Image that lives inside a column.
64 PT_HEADING_IMAGE, // Image that spans more than one column.
65 PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
66 PT_HORZ_LINE, // Horizontal Line.
67 PT_VERT_LINE, // Vertical Line.
68 PT_NOISE, // Lies outside of any column.
69 PT_COUNT
70 };
71
72 /** Returns true if PolyBlockType is of horizontal line type */
PTIsLineType(PolyBlockType type)73 inline bool PTIsLineType(PolyBlockType type) {
74 return type == PT_HORZ_LINE || type == PT_VERT_LINE;
75 }
76 /** Returns true if PolyBlockType is of image type */
PTIsImageType(PolyBlockType type)77 inline bool PTIsImageType(PolyBlockType type) {
78 return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
79 type == PT_PULLOUT_IMAGE;
80 }
81 /** Returns true if PolyBlockType is of text type */
PTIsTextType(PolyBlockType type)82 inline bool PTIsTextType(PolyBlockType type) {
83 return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
84 type == PT_PULLOUT_TEXT || type == PT_TABLE ||
85 type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
86 type == PT_INLINE_EQUATION;
87 }
88 // Returns true if PolyBlockType is of pullout(inter-column) type
PTIsPulloutType(PolyBlockType type)89 inline bool PTIsPulloutType(PolyBlockType type) {
90 return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
91 }
92
93 /**
94 * +------------------+ Orientation Example:
95 * | 1 Aaaa Aaaa Aaaa | ====================
96 * | Aaa aa aaa aa | To left is a diagram of some (1) English and
97 * | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
98 * | 2 |
99 * | ####### c c C | Upright Latin characters are represented as A and a.
100 * | ####### c c c | '<' represents a latin character rotated
101 * | < ####### c c c | anti-clockwise 90 degrees.
102 * | < ####### c c |
103 * | < ####### . c | Upright Chinese characters are represented C and c.
104 * | 3 ####### c |
105 * +------------------+ NOTA BENE: enum values here should match goodoc.proto
106
107 * If you orient your head so that "up" aligns with Orientation,
108 * then the characters will appear "right side up" and readable.
109 *
110 * In the example above, both the English and Chinese paragraphs are oriented
111 * so their "up" is the top of the page (page up). The photo credit is read
112 * with one's head turned leftward ("up" is to page left).
113 *
114 * The values of this enum match the convention of Tesseract's osdetect.h
115 */
116 enum Orientation {
117 ORIENTATION_PAGE_UP = 0,
118 ORIENTATION_PAGE_RIGHT = 1,
119 ORIENTATION_PAGE_DOWN = 2,
120 ORIENTATION_PAGE_LEFT = 3,
121 };
122
123 /**
124 * The grapheme clusters within a line of text are laid out logically
125 * in this direction, judged when looking at the text line rotated so that
126 * its Orientation is "page up".
127 *
128 * For English text, the writing direction is left-to-right. For the
129 * Chinese text in the above example, the writing direction is top-to-bottom.
130 */
131 enum WritingDirection {
132 WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
133 WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
134 WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
135 };
136
137 /**
138 * The text lines are read in the given sequence.
139 *
140 * In English, the order is top-to-bottom.
141 * In Chinese, vertical text lines are read right-to-left. Mongolian is
142 * written in vertical columns top to bottom like Chinese, but the lines
143 * order left-to right.
144 *
145 * Note that only some combinations make sense. For example,
146 * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
147 */
148 enum TextlineOrder {
149 TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
150 TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
151 TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
152 };
153
154 /**
155 * Possible modes for page layout analysis. These *must* be kept in order
156 * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
157 * so that the inequality test macros below work.
158 */
159 enum PageSegMode {
160 PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
161 PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
162 ///< script detection. (OSD)
163 PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
164 PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
165 PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
166 PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
167 ///< vertically aligned text.
168 PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
169 PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
170 PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
171 PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
172 PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
173 PSM_SPARSE_TEXT =
174 11, ///< Find as much text as possible in no particular order.
175 PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
176 PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
177 ///< hacks that are Tesseract-specific.
178
179 PSM_COUNT ///< Number of enum entries.
180 };
181
182 /**
183 * Inline functions that act on a PageSegMode to determine whether components of
184 * layout analysis are enabled.
185 * *Depend critically on the order of elements of PageSegMode.*
186 * NOTE that arg is an int for compatibility with INT_PARAM.
187 */
PSM_OSD_ENABLED(int pageseg_mode)188 inline bool PSM_OSD_ENABLED(int pageseg_mode) {
189 return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
190 }
PSM_ORIENTATION_ENABLED(int pageseg_mode)191 inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
192 return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
193 }
PSM_COL_FIND_ENABLED(int pageseg_mode)194 inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
195 return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
196 }
PSM_SPARSE(int pageseg_mode)197 inline bool PSM_SPARSE(int pageseg_mode) {
198 return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
199 }
PSM_BLOCK_FIND_ENABLED(int pageseg_mode)200 inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
201 return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
202 }
PSM_LINE_FIND_ENABLED(int pageseg_mode)203 inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
204 return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
205 }
PSM_WORD_FIND_ENABLED(int pageseg_mode)206 inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
207 return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
208 pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
209 }
210
211 /**
212 * enum of the elements of the page hierarchy, used in ResultIterator
213 * to provide functions that operate on each level without having to
214 * have 5x as many functions.
215 */
216 enum PageIteratorLevel {
217 RIL_BLOCK, // Block of text/image/separator line.
218 RIL_PARA, // Paragraph within a block.
219 RIL_TEXTLINE, // Line within a paragraph.
220 RIL_WORD, // Word within a textline.
221 RIL_SYMBOL // Symbol/character within a word.
222 };
223
224 /**
225 * JUSTIFICATION_UNKNOWN
226 * The alignment is not clearly one of the other options. This could happen
227 * for example if there are only one or two lines of text or the text looks
228 * like source code or poetry.
229 *
230 * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
231 * margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
232 * is written with a left-to-right script and with JUSTIFICATION_RIGHT if
233 * their text is written in a right-to-left script.
234 *
235 * Interpretation for text read in vertical lines:
236 * "Left" is wherever the starting reading position is.
237 *
238 * JUSTIFICATION_LEFT
239 * Each line, except possibly the first, is flush to the same left tab stop.
240 *
241 * JUSTIFICATION_CENTER
242 * The text lines of the paragraph are centered about a line going
243 * down through their middle of the text lines.
244 *
245 * JUSTIFICATION_RIGHT
246 * Each line, except possibly the first, is flush to the same right tab stop.
247 */
248 enum ParagraphJustification {
249 JUSTIFICATION_UNKNOWN,
250 JUSTIFICATION_LEFT,
251 JUSTIFICATION_CENTER,
252 JUSTIFICATION_RIGHT,
253 };
254
255 /**
256 * When Tesseract/Cube is initialized we can choose to instantiate/load/run
257 * only the Tesseract part, only the Cube part or both along with the combiner.
258 * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
259 *
260 * ATTENTION: When modifying this enum, please make sure to make the
261 * appropriate changes to all the enums mirroring it (e.g. OCREngine in
262 * cityblock/workflow/detection/detection_storage.proto). Such enums will
263 * mention the connection to OcrEngineMode in the comments.
264 */
265 enum OcrEngineMode {
266 OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
267 OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
268 OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
269 // to Tesseract when things get difficult.
270 // deprecated
271 OEM_DEFAULT, // Specify this mode when calling init_*(),
272 // to indicate that any of the above modes
273 // should be automatically inferred from the
274 // variables in the language-specific config,
275 // command-line configs, or if not specified
276 // in any of the above should be set to the
277 // default OEM_TESSERACT_ONLY.
278 OEM_COUNT // Number of OEMs
279 };
280
281 } // namespace tesseract.
282
283 #endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
284