1# © 2020 James R. Barlow: github.com/jbarlow83 2# 3# This Source Code Form is subject to the terms of the Mozilla Public 4# License, v. 2.0. If a copy of the MPL was not distributed with this 5# file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 7import logging 8import os 9import shutil 10import sys 11from distutils.version import LooseVersion 12from itertools import chain 13from pathlib import Path 14from typing import Any, Callable, Iterable, Iterator, Set, Tuple, TypeVar 15 16try: 17 import winreg 18except ModuleNotFoundError as e: 19 raise ModuleNotFoundError("This module is for Windows only") from e 20 21log = logging.getLogger(__name__) 22 23T = TypeVar('T') 24 25 26def registry_enum( 27 key: winreg.HKEYType, enum_fn: Callable[[winreg.HKEYType, int], T] 28) -> Iterator[T]: 29 LIMIT = 999 30 n = 0 31 while n < LIMIT: 32 try: 33 yield enum_fn(key, n) 34 n += 1 35 except OSError: 36 break 37 if n == LIMIT: 38 raise ValueError(f"Too many registry keys under {key}") 39 40 41def registry_subkeys(key: winreg.HKEYType) -> Iterator[str]: 42 return registry_enum(key, winreg.EnumKey) 43 44 45def registry_values(key: winreg.HKEYType) -> Iterator[Tuple[str, Any, int]]: 46 return registry_enum(key, winreg.EnumValue) 47 48 49def registry_path_ghostscript(env=None) -> Iterator[Path]: 50 try: 51 with winreg.OpenKey( 52 winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Artifex\GPL Ghostscript" 53 ) as k: 54 latest_gs = max(registry_subkeys(k), key=LooseVersion) 55 with winreg.OpenKey( 56 winreg.HKEY_LOCAL_MACHINE, fr"SOFTWARE\Artifex\GPL Ghostscript\{latest_gs}" 57 ) as k: 58 _, gs_path, _ = next(registry_values(k)) 59 yield Path(gs_path) / 'bin' 60 except OSError as e: 61 log.warning(e) 62 63 64def registry_path_tesseract(env=None) -> Iterator[Path]: 65 try: 66 with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Tesseract-OCR") as k: 67 for subkey, val, _valtype in registry_values(k): 68 if subkey == 'InstallDir': 69 tesseract_path = Path(val) 70 yield tesseract_path 71 except OSError as e: 72 log.warning(e) 73 74 75def program_files_paths(env=None) -> Iterator[Path]: 76 if not env: 77 env = os.environ 78 program_files = env.get('PROGRAMFILES', '') 79 80 def path_walker() -> Iterator[Path]: 81 for path in Path(program_files).iterdir(): 82 if not path.is_dir(): 83 continue 84 if path.name.lower() == 'tesseract-ocr': 85 yield path 86 elif path.name.lower() == 'gs': 87 yield from (p for p in path.glob('**/bin') if p.is_dir()) 88 89 return iter( 90 sorted( 91 (p for p in path_walker()), 92 key=lambda p: (p.name, p.parent.name), 93 reverse=True, 94 ) 95 ) 96 97 98def paths_from_env(env=None) -> Iterator[Path]: 99 return (Path(p) for p in os.get_exec_path(env) if p) 100 101 102def shim_path(new_paths: Callable[[Any], Iterator[Path]], env=None) -> str: 103 if not env: 104 env = os.environ 105 return os.pathsep.join(str(p) for p in new_paths(env) if p) 106 107 108SHIMS = [ 109 paths_from_env, 110 registry_path_ghostscript, 111 registry_path_tesseract, 112 program_files_paths, 113] 114 115 116def fix_windows_args(program: str, args, env): 117 """Adjust our desired program and command line arguments for use on Windows""" 118 119 if sys.version_info < (3, 8): 120 # bpo-33617 - Windows needs manual Path -> str conversion 121 args = [os.fspath(arg) for arg in args] 122 program = os.fspath(program) 123 124 # If we are running a .py on Windows, ensure we call it with this Python 125 # (to support test suite shims) 126 if program.lower().endswith('.py'): 127 args = [sys.executable] + args 128 129 # If the program we want is not on the PATH, check elsewhere 130 for shim in SHIMS: 131 shimmed_path = shim_path(shim, env) 132 new_args0 = shutil.which(args[0], path=shimmed_path) 133 if new_args0: 134 args[0] = new_args0 135 break 136 137 return args 138 139 140def unique_everseen(iterable: Iterable[T], key: Callable[[T], T]) -> Iterator[T]: 141 "List unique elements, preserving order." 142 # unique_everseen('AAAABBBCCDAABBB') --> A B C D 143 # unique_everseen('ABBCcAD', str.lower) --> A B C D 144 seen: Set[T] = set() 145 seen_add = seen.add 146 for element in iterable: 147 k = key(element) 148 if k not in seen: 149 seen_add(k) 150 yield element 151 152 153def shim_env_path(env=None): 154 if env is None: 155 env = os.environ 156 157 shim_paths = chain.from_iterable(shim(env) for shim in SHIMS) 158 return os.pathsep.join( 159 str(p) for p in unique_everseen(shim_paths, key=lambda p: str.casefold(str(p))) 160 ) 161