1# © 2020 James R. Barlow: github.com/jbarlow83
2#
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this
5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7import logging
8import os
9import shutil
10import sys
11from distutils.version import LooseVersion
12from itertools import chain
13from pathlib import Path
14from typing import Any, Callable, Iterable, Iterator, Set, Tuple, TypeVar
15
16try:
17    import winreg
18except ModuleNotFoundError as e:
19    raise ModuleNotFoundError("This module is for Windows only") from e
20
21log = logging.getLogger(__name__)
22
23T = TypeVar('T')
24
25
26def registry_enum(
27    key: winreg.HKEYType, enum_fn: Callable[[winreg.HKEYType, int], T]
28) -> Iterator[T]:
29    LIMIT = 999
30    n = 0
31    while n < LIMIT:
32        try:
33            yield enum_fn(key, n)
34            n += 1
35        except OSError:
36            break
37    if n == LIMIT:
38        raise ValueError(f"Too many registry keys under {key}")
39
40
41def registry_subkeys(key: winreg.HKEYType) -> Iterator[str]:
42    return registry_enum(key, winreg.EnumKey)
43
44
45def registry_values(key: winreg.HKEYType) -> Iterator[Tuple[str, Any, int]]:
46    return registry_enum(key, winreg.EnumValue)
47
48
49def registry_path_ghostscript(env=None) -> Iterator[Path]:
50    try:
51        with winreg.OpenKey(
52            winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Artifex\GPL Ghostscript"
53        ) as k:
54            latest_gs = max(registry_subkeys(k), key=LooseVersion)
55        with winreg.OpenKey(
56            winreg.HKEY_LOCAL_MACHINE, fr"SOFTWARE\Artifex\GPL Ghostscript\{latest_gs}"
57        ) as k:
58            _, gs_path, _ = next(registry_values(k))
59            yield Path(gs_path) / 'bin'
60    except OSError as e:
61        log.warning(e)
62
63
64def registry_path_tesseract(env=None) -> Iterator[Path]:
65    try:
66        with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Tesseract-OCR") as k:
67            for subkey, val, _valtype in registry_values(k):
68                if subkey == 'InstallDir':
69                    tesseract_path = Path(val)
70                    yield tesseract_path
71    except OSError as e:
72        log.warning(e)
73
74
75def program_files_paths(env=None) -> Iterator[Path]:
76    if not env:
77        env = os.environ
78    program_files = env.get('PROGRAMFILES', '')
79
80    def path_walker() -> Iterator[Path]:
81        for path in Path(program_files).iterdir():
82            if not path.is_dir():
83                continue
84            if path.name.lower() == 'tesseract-ocr':
85                yield path
86            elif path.name.lower() == 'gs':
87                yield from (p for p in path.glob('**/bin') if p.is_dir())
88
89    return iter(
90        sorted(
91            (p for p in path_walker()),
92            key=lambda p: (p.name, p.parent.name),
93            reverse=True,
94        )
95    )
96
97
98def paths_from_env(env=None) -> Iterator[Path]:
99    return (Path(p) for p in os.get_exec_path(env) if p)
100
101
102def shim_path(new_paths: Callable[[Any], Iterator[Path]], env=None) -> str:
103    if not env:
104        env = os.environ
105    return os.pathsep.join(str(p) for p in new_paths(env) if p)
106
107
108SHIMS = [
109    paths_from_env,
110    registry_path_ghostscript,
111    registry_path_tesseract,
112    program_files_paths,
113]
114
115
116def fix_windows_args(program: str, args, env):
117    """Adjust our desired program and command line arguments for use on Windows"""
118
119    if sys.version_info < (3, 8):
120        # bpo-33617 - Windows needs manual Path -> str conversion
121        args = [os.fspath(arg) for arg in args]
122        program = os.fspath(program)
123
124    # If we are running a .py on Windows, ensure we call it with this Python
125    # (to support test suite shims)
126    if program.lower().endswith('.py'):
127        args = [sys.executable] + args
128
129    # If the program we want is not on the PATH, check elsewhere
130    for shim in SHIMS:
131        shimmed_path = shim_path(shim, env)
132        new_args0 = shutil.which(args[0], path=shimmed_path)
133        if new_args0:
134            args[0] = new_args0
135            break
136
137    return args
138
139
140def unique_everseen(iterable: Iterable[T], key: Callable[[T], T]) -> Iterator[T]:
141    "List unique elements, preserving order."
142    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
143    # unique_everseen('ABBCcAD', str.lower) --> A B C D
144    seen: Set[T] = set()
145    seen_add = seen.add
146    for element in iterable:
147        k = key(element)
148        if k not in seen:
149            seen_add(k)
150            yield element
151
152
153def shim_env_path(env=None):
154    if env is None:
155        env = os.environ
156
157    shim_paths = chain.from_iterable(shim(env) for shim in SHIMS)
158    return os.pathsep.join(
159        str(p) for p in unique_everseen(shim_paths, key=lambda p: str.casefold(str(p)))
160    )
161