1"""
2Wrapper for various OCR tools.
3
4USAGE:
5from PIL import Image
6import sys
7from pyocr import pyocr
8
9tools = pyocr.get_available_tools()[:]
10if len(tools) == 0:
11    print("No OCR tool found")
12    sys.exit(1)
13print("Using '%s'" % (tools[0].get_name()))
14tools[0].image_to_string(Image.open('test.png'), lang='fra',
15                         builder=TextBuilder())
16
17
18DETAILS:
19Each module wrapping an OCR tool provides the following functions:
20- get_name(): Return the name of the tool
21- is_available(): Returns True if the tool is installed. False else.
22- get_version(): Return a tuple containing the version of the tool (if
23  installed)
24- get_available_builders(): Returns a list of builders that can be used with
25  this tool (see image_to_string())
26- get_available_languages(): Returns a list of languages supported by this
27  tool. Languages are usually written using ISO 3 letters country codes
28- image_to_string():
29    Takes 3 arguments:
30    - an image (see python Imaging "Image" module) (mandatory)
31    - lang=<language> (see get_available_languages()) (optional)
32    - builder=<builder> (see get_available_builders() or the classes in the
33      module 'pyocr.builders') (optional: default is
34      pyocr.builders.TextBuilder)
35    Returned value depends of the specified builder.
36
37
38COPYRIGHT:
39Pyocr is released under the GPL v3.
40Copyright (c) Jerome Flesch, 2011-2016
41Tesseract module: Copyright (c) Samuel Hoffstaetter, 2009
42
43WEBSITE:
44https://gitlab.gnome.org/World/OpenPaperwork/pyocr#readme
45"""
46
47from . import _version
48from . import cuneiform
49from . import libtesseract
50from . import tesseract
51
52__all__ = [
53    'get_available_tools',
54    'TOOLS',
55    'VERSION',
56]
57
58
59TOOLS = [  # in preference order
60    tesseract,
61    libtesseract,
62    cuneiform,
63]
64
65try:
66    VERSION = _version.version
67    # drop Git commit
68    VERSION = VERSION.split("-", 1)[0]
69    # split major, minor, update
70    VERSION = VERSION.split(".")
71    # ensure always at least 3 elements
72    VERSION = VERSION + ([0] * (3 - len(VERSION)))
73    # seal it
74    VERSION = tuple(VERSION)
75except Exception as exc:
76    print("WARNING: Failed to parse PyOCR version: " + str(_version.version))
77    print("WARNING: Exception was: " + str(exc))
78    VERSION = (0, 0, 0)
79
80
81def get_available_tools():
82    """
83    Return a list of OCR tools available on the local system.
84    """
85    available = []
86    for tool in TOOLS:
87        if tool.is_available():
88            available.append(tool)
89    return available
90