1""" 2Wrapper for various OCR tools. 3 4USAGE: 5from PIL import Image 6import sys 7from pyocr import pyocr 8 9tools = pyocr.get_available_tools()[:] 10if len(tools) == 0: 11 print("No OCR tool found") 12 sys.exit(1) 13print("Using '%s'" % (tools[0].get_name())) 14tools[0].image_to_string(Image.open('test.png'), lang='fra', 15 builder=TextBuilder()) 16 17 18DETAILS: 19Each module wrapping an OCR tool provides the following functions: 20- get_name(): Return the name of the tool 21- is_available(): Returns True if the tool is installed. False else. 22- get_version(): Return a tuple containing the version of the tool (if 23 installed) 24- get_available_builders(): Returns a list of builders that can be used with 25 this tool (see image_to_string()) 26- get_available_languages(): Returns a list of languages supported by this 27 tool. Languages are usually written using ISO 3 letters country codes 28- image_to_string(): 29 Takes 3 arguments: 30 - an image (see python Imaging "Image" module) (mandatory) 31 - lang=<language> (see get_available_languages()) (optional) 32 - builder=<builder> (see get_available_builders() or the classes in the 33 module 'pyocr.builders') (optional: default is 34 pyocr.builders.TextBuilder) 35 Returned value depends of the specified builder. 36 37 38COPYRIGHT: 39Pyocr is released under the GPL v3. 40Copyright (c) Jerome Flesch, 2011-2016 41Tesseract module: Copyright (c) Samuel Hoffstaetter, 2009 42 43WEBSITE: 44https://gitlab.gnome.org/World/OpenPaperwork/pyocr#readme 45""" 46 47from . import _version 48from . import cuneiform 49from . import libtesseract 50from . import tesseract 51 52__all__ = [ 53 'get_available_tools', 54 'TOOLS', 55 'VERSION', 56] 57 58 59TOOLS = [ # in preference order 60 tesseract, 61 libtesseract, 62 cuneiform, 63] 64 65try: 66 VERSION = _version.version 67 # drop Git commit 68 VERSION = VERSION.split("-", 1)[0] 69 # split major, minor, update 70 VERSION = VERSION.split(".") 71 # ensure always at least 3 elements 72 VERSION = VERSION + ([0] * (3 - len(VERSION))) 73 # seal it 74 VERSION = tuple(VERSION) 75except Exception as exc: 76 print("WARNING: Failed to parse PyOCR version: " + str(_version.version)) 77 print("WARNING: Exception was: " + str(exc)) 78 VERSION = (0, 0, 0) 79 80 81def get_available_tools(): 82 """ 83 Return a list of OCR tools available on the local system. 84 """ 85 available = [] 86 for tool in TOOLS: 87 if tool.is_available(): 88 available.append(tool) 89 return available 90