1 /**********************************************************************
2 * File: tesseract.cpp
3 * Description: Main program for merge of tess and editor.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19 // Include automatically generated configuration file if running autoconf
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23
24 #include <cerrno> // for errno
25 #if defined(__USE_GNU)
26 # include <cfenv> // for feenableexcept
27 #endif
28 #include <climits> // for INT_MIN, INT_MAX
29 #include <cstdlib> // for std::getenv
30 #include <iostream>
31 #include <map> // for std::map
32 #include <memory> // std::unique_ptr
33
34 #include <allheaders.h>
35 #include <tesseract/baseapi.h>
36 #include "dict.h"
37 #if defined(USE_OPENCL)
38 # include "openclwrapper.h" // for OpenclDevice
39 #endif
40 #include <tesseract/renderer.h>
41 #include "simddetect.h"
42 #include "tprintf.h" // for tprintf
43
44 #ifdef _OPENMP
45 # include <omp.h>
46 #endif
47
48 #if defined(HAVE_LIBARCHIVE)
49 # include <archive.h>
50 #endif
51 #if defined(HAVE_LIBCURL)
52 # include <curl/curl.h>
53 #endif
54
55 #if defined(_WIN32)
56 # include <fcntl.h>
57 # include <io.h>
58 # if defined(HAVE_TIFFIO_H)
59
60 # include <tiffio.h>
61
Win32ErrorHandler(const char * module,const char * fmt,va_list ap)62 static void Win32ErrorHandler(const char *module, const char *fmt, va_list ap) {
63 if (module != nullptr) {
64 fprintf(stderr, "%s: ", module);
65 }
66 vfprintf(stderr, fmt, ap);
67 fprintf(stderr, ".\n");
68 }
69
Win32WarningHandler(const char * module,const char * fmt,va_list ap)70 static void Win32WarningHandler(const char *module, const char *fmt, va_list ap) {
71 if (module != nullptr) {
72 fprintf(stderr, "%s: ", module);
73 }
74 fprintf(stderr, "Warning, ");
75 vfprintf(stderr, fmt, ap);
76 fprintf(stderr, ".\n");
77 }
78
79 # endif /* HAVE_TIFFIO_H */
80
81 class AutoWin32ConsoleOutputCP {
82 public:
AutoWin32ConsoleOutputCP(UINT codeCP)83 explicit AutoWin32ConsoleOutputCP(UINT codeCP) {
84 oldCP_ = GetConsoleOutputCP();
85 SetConsoleOutputCP(codeCP);
86 }
~AutoWin32ConsoleOutputCP()87 ~AutoWin32ConsoleOutputCP() {
88 SetConsoleOutputCP(oldCP_);
89 }
90
91 private:
92 UINT oldCP_;
93 };
94
95 static AutoWin32ConsoleOutputCP autoWin32ConsoleOutputCP(CP_UTF8);
96
97 #endif // _WIN32
98
99 using namespace tesseract;
100
PrintVersionInfo()101 static void PrintVersionInfo() {
102 char *versionStrP;
103
104 printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
105
106 versionStrP = getLeptonicaVersion();
107 printf(" %s\n", versionStrP);
108 lept_free(versionStrP);
109
110 versionStrP = getImagelibVersions();
111 printf(" %s\n", versionStrP);
112 lept_free(versionStrP);
113
114 #ifdef USE_OPENCL
115 cl_platform_id platform[4];
116 cl_uint num_platforms;
117
118 printf(" OpenCL info:\n");
119 if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
120 printf(" Found %u platform(s).\n", num_platforms);
121 for (unsigned n = 0; n < num_platforms; n++) {
122 char info[256];
123 if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == CL_SUCCESS) {
124 printf(" Platform %u name: %s.\n", n + 1, info);
125 }
126 if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == CL_SUCCESS) {
127 printf(" Version: %s.\n", info);
128 }
129 cl_device_id devices[2];
130 cl_uint num_devices;
131 if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, &num_devices) == CL_SUCCESS) {
132 printf(" Found %u device(s).\n", num_devices);
133 for (unsigned i = 0; i < num_devices; ++i) {
134 if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) == CL_SUCCESS) {
135 printf(" Device %u name: %s.\n", i + 1, info);
136 }
137 }
138 }
139 }
140 }
141 #endif
142 #if defined(HAVE_NEON) || defined(__aarch64__)
143 if (tesseract::SIMDDetect::IsNEONAvailable())
144 printf(" Found NEON\n");
145 #else
146 if (tesseract::SIMDDetect::IsAVX512BWAvailable()) {
147 printf(" Found AVX512BW\n");
148 }
149 if (tesseract::SIMDDetect::IsAVX512FAvailable()) {
150 printf(" Found AVX512F\n");
151 }
152 if (tesseract::SIMDDetect::IsAVX2Available()) {
153 printf(" Found AVX2\n");
154 }
155 if (tesseract::SIMDDetect::IsAVXAvailable()) {
156 printf(" Found AVX\n");
157 }
158 if (tesseract::SIMDDetect::IsFMAAvailable()) {
159 printf(" Found FMA\n");
160 }
161 if (tesseract::SIMDDetect::IsSSEAvailable()) {
162 printf(" Found SSE4.1\n");
163 }
164 #endif
165 #ifdef _OPENMP
166 printf(" Found OpenMP %d\n", _OPENMP);
167 #endif
168 #if defined(HAVE_LIBARCHIVE)
169 # if ARCHIVE_VERSION_NUMBER >= 3002000
170 printf(" Found %s\n", archive_version_details());
171 # else
172 printf(" Found %s\n", archive_version_string());
173 # endif // ARCHIVE_VERSION_NUMBER
174 #endif // HAVE_LIBARCHIVE
175 #if defined(HAVE_LIBCURL)
176 printf(" Found %s\n", curl_version());
177 #endif
178 }
179
PrintHelpForPSM()180 static void PrintHelpForPSM() {
181 const char *msg =
182 "Page segmentation modes:\n"
183 " 0 Orientation and script detection (OSD) only.\n"
184 " 1 Automatic page segmentation with OSD.\n"
185 " 2 Automatic page segmentation, but no OSD, or OCR. (not "
186 "implemented)\n"
187 " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
188 " 4 Assume a single column of text of variable sizes.\n"
189 " 5 Assume a single uniform block of vertically aligned text.\n"
190 " 6 Assume a single uniform block of text.\n"
191 " 7 Treat the image as a single text line.\n"
192 " 8 Treat the image as a single word.\n"
193 " 9 Treat the image as a single word in a circle.\n"
194 " 10 Treat the image as a single character.\n"
195 " 11 Sparse text. Find as much text as possible in no"
196 " particular order.\n"
197 " 12 Sparse text with OSD.\n"
198 " 13 Raw line. Treat the image as a single text line,\n"
199 " bypassing hacks that are Tesseract-specific.\n";
200
201 #ifdef DISABLED_LEGACY_ENGINE
202 const char *disabled_osd_msg = "\nNOTE: The OSD modes are currently disabled.\n";
203 printf("%s%s", msg, disabled_osd_msg);
204 #else
205 printf("%s", msg);
206 #endif
207 }
208
209 #ifndef DISABLED_LEGACY_ENGINE
PrintHelpForOEM()210 static void PrintHelpForOEM() {
211 const char *msg =
212 "OCR Engine modes:\n"
213 " 0 Legacy engine only.\n"
214 " 1 Neural nets LSTM engine only.\n"
215 " 2 Legacy + LSTM engines.\n"
216 " 3 Default, based on what is available.\n";
217
218 printf("%s", msg);
219 }
220 #endif // ndef DISABLED_LEGACY_ENGINE
221
PrintHelpExtra(const char * program)222 static void PrintHelpExtra(const char *program) {
223 printf(
224 "Usage:\n"
225 " %s --help | --help-extra | --help-psm | "
226 #ifndef DISABLED_LEGACY_ENGINE
227 "--help-oem | "
228 #endif
229 "--version\n"
230 " %s --list-langs [--tessdata-dir PATH]\n"
231 #ifndef DISABLED_LEGACY_ENGINE
232 " %s --print-fonts-table [options...] [configfile...]\n"
233 #endif // ndef DISABLED_LEGACY_ENGINE
234 " %s --print-parameters [options...] [configfile...]\n"
235 " %s imagename|imagelist|stdin outputbase|stdout [options...] "
236 "[configfile...]\n"
237 "\n"
238 "OCR options:\n"
239 " --tessdata-dir PATH Specify the location of tessdata path.\n"
240 " --user-words PATH Specify the location of user words file.\n"
241 " --user-patterns PATH Specify the location of user patterns file.\n"
242 " --dpi VALUE Specify DPI for input image.\n"
243 " --loglevel LEVEL Specify logging level. LEVEL can be\n"
244 " ALL, TRACE, DEBUG, INFO, WARN, ERROR, FATAL or OFF.\n"
245 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
246 " -c VAR=VALUE Set value for config variables.\n"
247 " Multiple -c arguments are allowed.\n"
248 " --psm NUM Specify page segmentation mode.\n"
249 #ifndef DISABLED_LEGACY_ENGINE
250 " --oem NUM Specify OCR Engine mode.\n"
251 #endif
252 "NOTE: These options must occur before any configfile.\n"
253 "\n",
254 program, program, program, program
255 #ifndef DISABLED_LEGACY_ENGINE
256 , program
257 #endif // ndef DISABLED_LEGACY_ENGINE
258 );
259
260 PrintHelpForPSM();
261 #ifndef DISABLED_LEGACY_ENGINE
262 printf("\n");
263 PrintHelpForOEM();
264 #endif
265
266 printf(
267 "\n"
268 "Single options:\n"
269 " -h, --help Show minimal help message.\n"
270 " --help-extra Show extra help for advanced users.\n"
271 " --help-psm Show page segmentation modes.\n"
272 #ifndef DISABLED_LEGACY_ENGINE
273 " --help-oem Show OCR Engine modes.\n"
274 #endif
275 " -v, --version Show version information.\n"
276 " --list-langs List available languages for tesseract engine.\n"
277 #ifndef DISABLED_LEGACY_ENGINE
278 " --print-fonts-table Print tesseract fonts table.\n"
279 #endif // ndef DISABLED_LEGACY_ENGINE
280 " --print-parameters Print tesseract parameters.\n");
281 }
282
PrintHelpMessage(const char * program)283 static void PrintHelpMessage(const char *program) {
284 printf(
285 "Usage:\n"
286 " %s --help | --help-extra | --version\n"
287 " %s --list-langs\n"
288 " %s imagename outputbase [options...] [configfile...]\n"
289 "\n"
290 "OCR options:\n"
291 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
292 "NOTE: These options must occur before any configfile.\n"
293 "\n"
294 "Single options:\n"
295 " --help Show this help message.\n"
296 " --help-extra Show extra help for advanced users.\n"
297 " --version Show version information.\n"
298 " --list-langs List available languages for tesseract "
299 "engine.\n",
300 program, program, program);
301 }
302
SetVariablesFromCLArgs(tesseract::TessBaseAPI & api,int argc,char ** argv)303 static bool SetVariablesFromCLArgs(tesseract::TessBaseAPI &api, int argc, char **argv) {
304 bool success = true;
305 char opt1[256], opt2[255];
306 for (int i = 0; i < argc; i++) {
307 if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
308 strncpy(opt1, argv[i + 1], 255);
309 opt1[255] = '\0';
310 char *p = strchr(opt1, '=');
311 if (!p) {
312 fprintf(stderr, "Missing = in configvar assignment\n");
313 success = false;
314 break;
315 }
316 *p = 0;
317 strncpy(opt2, strchr(argv[i + 1], '=') + 1, sizeof(opt2) - 1);
318 opt2[254] = 0;
319 ++i;
320
321 if (!api.SetVariable(opt1, opt2)) {
322 fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
323 }
324 }
325 }
326 return success;
327 }
328
PrintLangsList(tesseract::TessBaseAPI & api)329 static void PrintLangsList(tesseract::TessBaseAPI &api) {
330 std::vector<std::string> languages;
331 api.GetAvailableLanguagesAsVector(&languages);
332 printf("List of available languages in \"%s\" (%zu):\n",
333 api.GetDatapath(), languages.size());
334 for (const auto &language : languages) {
335 printf("%s\n", language.c_str());
336 }
337 api.End();
338 }
339
340 /**
341 * We have 2 possible sources of pagesegmode: a config file and
342 * the command line. For backwards compatibility reasons, the
343 * default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
344 * default for this program is tesseract::PSM_AUTO. We will let
345 * the config file take priority, so the command-line default
346 * can take priority over the tesseract default, so we use the
347 * value from the command line only if the retrieved mode
348 * is still tesseract::PSM_SINGLE_BLOCK, indicating no change
349 * in any config file. Therefore the only way to force
350 * tesseract::PSM_SINGLE_BLOCK is from the command line.
351 * It would be simpler if we could set the value before Init,
352 * but that doesn't work.
353 */
FixPageSegMode(tesseract::TessBaseAPI & api,tesseract::PageSegMode pagesegmode)354 static void FixPageSegMode(tesseract::TessBaseAPI &api, tesseract::PageSegMode pagesegmode) {
355 if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) {
356 api.SetPageSegMode(pagesegmode);
357 }
358 }
359
checkArgValues(int arg,const char * mode,int count)360 static bool checkArgValues(int arg, const char *mode, int count) {
361 if (arg >= count || arg < 0) {
362 printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
363 return false;
364 }
365 return true;
366 }
367
368 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
ParseArgs(int argc,char ** argv,const char ** lang,const char ** image,const char ** outputbase,const char ** datapath,l_int32 * dpi,bool * list_langs,bool * print_parameters,bool * print_fonts_table,std::vector<std::string> * vars_vec,std::vector<std::string> * vars_values,l_int32 * arg_i,tesseract::PageSegMode * pagesegmode,tesseract::OcrEngineMode * enginemode)369 static bool ParseArgs(int argc, char **argv, const char **lang, const char **image,
370 const char **outputbase, const char **datapath, l_int32 *dpi,
371 bool *list_langs, bool *print_parameters, bool* print_fonts_table, std::vector<std::string> *vars_vec,
372 std::vector<std::string> *vars_values, l_int32 *arg_i,
373 tesseract::PageSegMode *pagesegmode, tesseract::OcrEngineMode *enginemode) {
374 bool noocr = false;
375 int i;
376 for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
377 if (*image != nullptr && *outputbase == nullptr) {
378 // outputbase follows image, don't allow options at that position.
379 *outputbase = argv[i];
380 } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
381 PrintHelpMessage(argv[0]);
382 noocr = true;
383 } else if (strcmp(argv[i], "--help-extra") == 0) {
384 PrintHelpExtra(argv[0]);
385 noocr = true;
386 } else if ((strcmp(argv[i], "--help-psm") == 0)) {
387 PrintHelpForPSM();
388 noocr = true;
389 #ifndef DISABLED_LEGACY_ENGINE
390 } else if ((strcmp(argv[i], "--help-oem") == 0)) {
391 PrintHelpForOEM();
392 noocr = true;
393 #endif
394 } else if ((strcmp(argv[i], "-v") == 0) || (strcmp(argv[i], "--version") == 0)) {
395 PrintVersionInfo();
396 noocr = true;
397 } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
398 *lang = argv[i + 1];
399 ++i;
400 } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
401 *datapath = argv[i + 1];
402 ++i;
403 } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
404 *dpi = atoi(argv[i + 1]);
405 ++i;
406 } else if (strcmp(argv[i], "--loglevel") == 0 && i + 1 < argc) {
407 // Allow the log levels which are used by log4cxx.
408 const std::string loglevel_string = argv[++i];
409 static const std::map<const std::string, int> loglevels {
410 {"ALL", INT_MIN},
411 {"TRACE", 5000},
412 {"DEBUG", 10000},
413 {"INFO", 20000},
414 {"WARN", 30000},
415 {"ERROR", 40000},
416 {"FATAL", 50000},
417 {"OFF", INT_MAX},
418 };
419 try {
420 auto loglevel = loglevels.at(loglevel_string);
421 log_level = loglevel;
422 } catch(const std::out_of_range& e) {
423 // TODO: Allow numeric argument?
424 tprintf("Error, unsupported --loglevel %s\n", loglevel_string.c_str());
425 return false;
426 }
427 } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
428 vars_vec->push_back("user_words_file");
429 vars_values->push_back(argv[i + 1]);
430 ++i;
431 } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
432 vars_vec->push_back("user_patterns_file");
433 vars_values->push_back(argv[i + 1]);
434 ++i;
435 } else if (strcmp(argv[i], "--list-langs") == 0) {
436 noocr = true;
437 *list_langs = true;
438 } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
439 if (!checkArgValues(atoi(argv[i + 1]), "PSM", tesseract::PSM_COUNT)) {
440 return false;
441 }
442 *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
443 ++i;
444 } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
445 #ifndef DISABLED_LEGACY_ENGINE
446 int oem = atoi(argv[i + 1]);
447 if (!checkArgValues(oem, "OEM", tesseract::OEM_COUNT)) {
448 return false;
449 }
450 *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
451 #endif
452 ++i;
453 } else if (strcmp(argv[i], "--print-parameters") == 0) {
454 noocr = true;
455 *print_parameters = true;
456 #ifndef DISABLED_LEGACY_ENGINE
457 } else if (strcmp(argv[i], "--print-fonts-table") == 0) {
458 noocr = true;
459 *print_fonts_table = true;
460 #endif // ndef DISABLED_LEGACY_ENGINE
461 } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
462 // handled properly after api init
463 ++i;
464 } else if (*image == nullptr) {
465 *image = argv[i];
466 } else {
467 // Unexpected argument.
468 fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
469 return false;
470 }
471 }
472
473 *arg_i = i;
474
475 if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
476 // OSD = orientation and script detection.
477 if (*lang != nullptr && strcmp(*lang, "osd")) {
478 // If the user explicitly specifies a language (other than osd)
479 // or a script, only orientation can be detected.
480 fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
481 } else {
482 // That mode requires osd.traineddata to detect orientation and script.
483 *lang = "osd";
484 }
485 }
486
487 if (*outputbase == nullptr && noocr == false) {
488 PrintHelpMessage(argv[0]);
489 return false;
490 }
491
492 return true;
493 }
494
PreloadRenderers(tesseract::TessBaseAPI & api,std::vector<std::unique_ptr<TessResultRenderer>> & renderers,tesseract::PageSegMode pagesegmode,const char * outputbase)495 static void PreloadRenderers(tesseract::TessBaseAPI &api,
496 std::vector<std::unique_ptr<TessResultRenderer>> &renderers,
497 tesseract::PageSegMode pagesegmode, const char *outputbase) {
498 if (pagesegmode == tesseract::PSM_OSD_ONLY) {
499 #ifndef DISABLED_LEGACY_ENGINE
500 renderers.push_back(std::make_unique<tesseract::TessOsdRenderer>(outputbase));
501 #endif // ndef DISABLED_LEGACY_ENGINE
502 } else {
503 bool error = false;
504 bool b;
505 api.GetBoolVariable("tessedit_create_hocr", &b);
506 if (b) {
507 bool font_info;
508 api.GetBoolVariable("hocr_font_info", &font_info);
509 auto renderer = std::make_unique<tesseract::TessHOcrRenderer>(outputbase, font_info);
510 if (renderer->happy()) {
511 renderers.push_back(std::move(renderer));
512 } else {
513 tprintf("Error, could not create hOCR output file: %s\n", strerror(errno));
514 error = true;
515 }
516 }
517
518 api.GetBoolVariable("tessedit_create_alto", &b);
519 if (b) {
520 auto renderer = std::make_unique<tesseract::TessAltoRenderer>(outputbase);
521 if (renderer->happy()) {
522 renderers.push_back(std::move(renderer));
523 } else {
524 tprintf("Error, could not create ALTO output file: %s\n", strerror(errno));
525 error = true;
526 }
527 }
528
529 api.GetBoolVariable("tessedit_create_tsv", &b);
530 if (b) {
531 bool font_info;
532 api.GetBoolVariable("hocr_font_info", &font_info);
533 auto renderer = std::make_unique<tesseract::TessTsvRenderer>(outputbase, font_info);
534 if (renderer->happy()) {
535 renderers.push_back(std::move(renderer));
536 } else {
537 tprintf("Error, could not create TSV output file: %s\n", strerror(errno));
538 error = true;
539 }
540 }
541
542 api.GetBoolVariable("tessedit_create_pdf", &b);
543 if (b) {
544 #ifdef WIN32
545 if (_setmode(_fileno(stdout), _O_BINARY) == -1)
546 tprintf("ERROR: cin to binary: %s", strerror(errno));
547 #endif // WIN32
548 bool textonly;
549 api.GetBoolVariable("textonly_pdf", &textonly);
550 auto renderer = std::make_unique<tesseract::TessPDFRenderer>(outputbase, api.GetDatapath(), textonly);
551 if (renderer->happy()) {
552 renderers.push_back(std::move(renderer));
553 } else {
554 tprintf("Error, could not create PDF output file: %s\n", strerror(errno));
555 error = true;
556 }
557 }
558
559 api.GetBoolVariable("tessedit_write_unlv", &b);
560 if (b) {
561 api.SetVariable("unlv_tilde_crunching", "true");
562 auto renderer = std::make_unique<tesseract::TessUnlvRenderer>(outputbase);
563 if (renderer->happy()) {
564 renderers.push_back(std::move(renderer));
565 } else {
566 tprintf("Error, could not create UNLV output file: %s\n", strerror(errno));
567 error = true;
568 }
569 }
570
571 api.GetBoolVariable("tessedit_create_lstmbox", &b);
572 if (b) {
573 auto renderer = std::make_unique<tesseract::TessLSTMBoxRenderer>(outputbase);
574 if (renderer->happy()) {
575 renderers.push_back(std::move(renderer));
576 } else {
577 tprintf("Error, could not create LSTM BOX output file: %s\n", strerror(errno));
578 error = true;
579 }
580 }
581
582 api.GetBoolVariable("tessedit_create_boxfile", &b);
583 if (b) {
584 auto renderer = std::make_unique<tesseract::TessBoxTextRenderer>(outputbase);
585 if (renderer->happy()) {
586 renderers.push_back(std::move(renderer));
587 } else {
588 tprintf("Error, could not create BOX output file: %s\n", strerror(errno));
589 error = true;
590 }
591 }
592
593 api.GetBoolVariable("tessedit_create_wordstrbox", &b);
594 if (b) {
595 auto renderer = std::make_unique<tesseract::TessWordStrBoxRenderer>(outputbase);
596 if (renderer->happy()) {
597 renderers.push_back(std::move(renderer));
598 } else {
599 tprintf("Error, could not create WordStr BOX output file: %s\n", strerror(errno));
600 error = true;
601 }
602 }
603
604 api.GetBoolVariable("tessedit_create_txt", &b);
605 if (b || (!error && renderers.empty())) {
606 // Create text output if no other output was requested
607 // even if text output was not explicitly requested unless
608 // there was an error.
609 auto renderer = std::make_unique<tesseract::TessTextRenderer>(outputbase);
610 if (renderer->happy()) {
611 renderers.push_back(std::move(renderer));
612 } else {
613 tprintf("Error, could not create TXT output file: %s\n", strerror(errno));
614 }
615 }
616 }
617
618 // Null-out the renderers that are
619 // added to the root, and leave the root in the vector.
620 for (size_t r = 1; r < renderers.size(); ++r) {
621 renderers[0]->insert(renderers[r].get());
622 renderers[r].release(); // at the moment insert() is owning
623 }
624 }
625
626 /**********************************************************************
627 * main()
628 *
629 **********************************************************************/
630
main(int argc,char ** argv)631 int main(int argc, char **argv) {
632 #if defined(__USE_GNU)
633 // Raise SIGFPE.
634 # if defined(__clang__)
635 // clang creates code which causes some FP exceptions, so don't enable those.
636 feenableexcept(FE_DIVBYZERO);
637 # else
638 feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
639 # endif
640 #endif
641 const char *lang = nullptr;
642 const char *image = nullptr;
643 const char *outputbase = nullptr;
644 const char *datapath = nullptr;
645 bool list_langs = false;
646 bool print_parameters = false;
647 bool print_fonts_table = false;
648 l_int32 dpi = 0;
649 int arg_i = 1;
650 tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
651 #ifdef DISABLED_LEGACY_ENGINE
652 auto enginemode = tesseract::OEM_LSTM_ONLY;
653 #else
654 tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
655 #endif
656 std::vector<std::string> vars_vec;
657 std::vector<std::string> vars_values;
658
659 if (std::getenv("LEPT_MSG_SEVERITY")) {
660 // Get Leptonica message level from environment variable.
661 setMsgSeverity(L_SEVERITY_EXTERNAL);
662 } else {
663 // Disable debugging and informational messages from Leptonica.
664 setMsgSeverity(L_SEVERITY_ERROR);
665 }
666
667 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
668 /* Show libtiff errors and warnings on console (not in GUI). */
669 TIFFSetErrorHandler(Win32ErrorHandler);
670 TIFFSetWarningHandler(Win32WarningHandler);
671 #endif // HAVE_TIFFIO_H && _WIN32
672
673 if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
674 &print_parameters, &print_fonts_table, &vars_vec, &vars_values, &arg_i, &pagesegmode, &enginemode)) {
675 return EXIT_FAILURE;
676 }
677
678 bool in_recognition_mode = !list_langs && !print_parameters && !print_fonts_table;
679
680 if (lang == nullptr && in_recognition_mode) {
681 // Set default language model if none was given and a model file is needed.
682 lang = "eng";
683 }
684
685 if (image == nullptr && in_recognition_mode) {
686 return EXIT_SUCCESS;
687 }
688
689 // Call GlobalDawgCache here to create the global DawgCache object before
690 // the TessBaseAPI object. This fixes the order of destructor calls:
691 // first TessBaseAPI must be destructed, DawgCache must be the last object.
692 tesseract::Dict::GlobalDawgCache();
693
694 TessBaseAPI api;
695
696 api.SetOutputName(outputbase);
697
698 const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i,
699 &vars_vec, &vars_values, false);
700
701 if (!SetVariablesFromCLArgs(api, argc, argv)) {
702 return EXIT_FAILURE;
703 }
704
705 // SIMD settings might be overridden by config variable.
706 tesseract::SIMDDetect::Update();
707
708 if (list_langs) {
709 PrintLangsList(api);
710 return EXIT_SUCCESS;
711 }
712
713 if (init_failed) {
714 fprintf(stderr, "Could not initialize tesseract.\n");
715 return EXIT_FAILURE;
716 }
717
718 if (print_parameters) {
719 FILE *fout = stdout;
720 fprintf(stdout, "Tesseract parameters:\n");
721 api.PrintVariables(fout);
722 api.End();
723 return EXIT_SUCCESS;
724 }
725
726 #ifndef DISABLED_LEGACY_ENGINE
727 if (print_fonts_table) {
728 FILE* fout = stdout;
729 fprintf(stdout, "Tesseract fonts table:\n");
730 api.PrintFontsTable(fout);
731 api.End();
732 return EXIT_SUCCESS;
733 }
734 #endif // ndef DISABLED_LEGACY_ENGINE
735
736 FixPageSegMode(api, pagesegmode);
737
738 if (dpi) {
739 auto dpi_string = std::to_string(dpi);
740 api.SetVariable("user_defined_dpi", dpi_string.c_str());
741 }
742
743 int ret_val = EXIT_SUCCESS;
744
745 if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
746 Pix *pixs = pixRead(image);
747 if (!pixs) {
748 fprintf(stderr, "Leptonica can't process input file: %s\n", image);
749 return 2;
750 }
751
752 api.SetImage(pixs);
753
754 tesseract::Orientation orientation;
755 tesseract::WritingDirection direction;
756 tesseract::TextlineOrder order;
757 float deskew_angle;
758
759 const std::unique_ptr<const tesseract::PageIterator> it(api.AnalyseLayout());
760 if (it) {
761 // TODO: Implement output of page segmentation, see documentation
762 // ("Automatic page segmentation, but no OSD, or OCR").
763 it->Orientation(&orientation, &direction, &order, &deskew_angle);
764 tprintf(
765 "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
766 "Deskew angle: %.4f\n",
767 orientation, direction, order, deskew_angle);
768 } else {
769 ret_val = EXIT_FAILURE;
770 }
771
772 pixDestroy(&pixs);
773 return ret_val;
774 }
775
776 // Set in_training_mode to true when using one of these configs:
777 // ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train.
778 // In this mode no other OCR result files are written.
779 bool b = false;
780 bool in_training_mode = (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
781 (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
782 (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
783 (api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
784
785 #ifdef DISABLED_LEGACY_ENGINE
786 auto cur_psm = api.GetPageSegMode();
787 auto osd_warning = std::string("");
788 if (cur_psm == tesseract::PSM_OSD_ONLY) {
789 const char *disabled_osd_msg =
790 "\nERROR: The page segmentation mode 0 (OSD Only) is currently "
791 "disabled.\n\n";
792 fprintf(stderr, "%s", disabled_osd_msg);
793 return EXIT_FAILURE;
794 } else if (cur_psm == tesseract::PSM_AUTO_OSD) {
795 api.SetPageSegMode(tesseract::PSM_AUTO);
796 osd_warning +=
797 "\nWarning: The page segmentation mode 1 (Auto+OSD) is currently "
798 "disabled. "
799 "Using PSM 3 (Auto) instead.\n\n";
800 } else if (cur_psm == tesseract::PSM_SPARSE_TEXT_OSD) {
801 api.SetPageSegMode(tesseract::PSM_SPARSE_TEXT);
802 osd_warning +=
803 "\nWarning: The page segmentation mode 12 (Sparse text + OSD) is "
804 "currently disabled. "
805 "Using PSM 11 (Sparse text) instead.\n\n";
806 }
807 #endif // def DISABLED_LEGACY_ENGINE
808
809 std::vector<std::unique_ptr<TessResultRenderer>> renderers;
810
811 if (in_training_mode) {
812 renderers.push_back(nullptr);
813 } else if (outputbase != nullptr) {
814 PreloadRenderers(api, renderers, pagesegmode, outputbase);
815 }
816
817 if (!renderers.empty()) {
818 #ifdef DISABLED_LEGACY_ENGINE
819 if (!osd_warning.empty()) {
820 fprintf(stderr, "%s", osd_warning.c_str());
821 }
822 #endif
823 bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0].get());
824 if (!succeed) {
825 fprintf(stderr, "Error during processing.\n");
826 ret_val = EXIT_FAILURE;
827 }
828 }
829
830 return ret_val;
831 }
832