1import enum
2import re
3from typing import Any
4from typing import Optional
5from typing import Tuple
6
7import _onigurumacffi
8
9_ffi = _onigurumacffi.ffi
10_lib = _onigurumacffi.lib
11
12_BACKREF_RE = re.compile(r'((?<!\\)(?:\\\\)*)\\([0-9]+)')
13
14
15class OnigError(RuntimeError):
16    pass
17
18
19class OnigSearchOption(enum.IntEnum):
20    NONE = _lib.ONIG_OPTION_NONE
21    NOTBOL = _lib.ONIG_OPTION_NOTBOL
22    NOTEOL = _lib.ONIG_OPTION_NOTEOL
23    POSIX_REGION = _lib.ONIG_OPTION_POSIX_REGION
24    CHECK_VALIDITY_OF_STRING = _lib.ONIG_OPTION_CHECK_VALIDITY_OF_STRING
25    NOT_BEGIN_STRING = _lib.ONIG_OPTION_NOT_BEGIN_STRING
26    NOT_BEGIN_POSITION = _lib.ONIG_OPTION_NOT_BEGIN_POSITION
27    NOT_END_STRING = _lib.ONIG_OPTION_NOT_END_STRING
28
29
30def _err(code: int, *args: Any) -> str:
31    buf = _ffi.new('OnigUChar[ONIG_MAX_ERROR_MESSAGE_LEN]')
32    length = _lib.onig_error_code_to_str(buf, code, *args)
33    return bytes(buf[0:length]).decode()
34
35
36def _check(code: int, *args: Any) -> None:
37    if code < 0:
38        raise OnigError(_err(code, *args))
39
40
41_check(_lib.onigcffi_initialize())
42__onig_version__ = _ffi.string(_lib.onig_version()).decode()
43
44
45class _Match:
46    __slots__ = ('_s_b', '_begs', '_ends')
47
48    def __init__(
49        self,
50        s_b: bytes,
51        begs: Tuple[int, ...],
52        ends: Tuple[int, ...],
53    ) -> None:
54        self._s_b = s_b
55        self._begs = begs
56        self._ends = ends
57
58    def __repr__(self) -> str:
59        return f'<onigurumacffi._Match span={self.span()} match={self[0]!r}>'
60
61    def group(self, n: int = 0) -> str:
62        return self._s_b[self._begs[n]:self._ends[n]].decode()
63
64    __getitem__ = group
65
66    def start(self, n: int = 0) -> int:
67        return len(self._s_b[:self._begs[n]].decode())
68
69    def end(self, n: int = 0) -> int:
70        return len(self._s_b[:self._ends[n]].decode())
71
72    def span(self, n: int = 0) -> Tuple[int, int]:
73        return self.start(n), self.end(n)
74
75    def expand(self, s: str) -> str:
76        return _BACKREF_RE.sub(lambda m: f'{m[1]}{self[int(m[2])]}', s)
77
78    @property
79    def string(self) -> str:
80        return self._s_b.decode()
81
82
83def _start_params(s: str, start: int) -> Tuple[bytes, int]:
84    return s.encode(), len(s[:start].encode())
85
86
87def _region() -> Any:
88    return _ffi.gc(_lib.onig_region_new(), _lib.onigcffi_region_free)
89
90
91def _match_ret(ret: int, s_b: bytes, region: Any) -> Optional[_Match]:
92    if ret == _lib.ONIG_MISMATCH:
93        return None
94    else:
95        _check(ret)
96
97    begs = tuple(region[0].beg[0:region[0].num_regs])
98    ends = tuple(region[0].end[0:region[0].num_regs])
99
100    return _Match(s_b, begs, ends)
101
102
103class _Pattern:
104    def __init__(self, pattern: str, regex_t: Any) -> None:
105        self._pattern = pattern
106        self._regex_t = _ffi.gc(regex_t, _lib.onig_free)
107
108    def __repr__(self) -> str:
109        return f'{__name__}.compile({self._pattern!r})'
110
111    def number_of_captures(self) -> int:
112        return _lib.onig_number_of_captures(self._regex_t)
113
114    def match(
115            self,
116            s: str,
117            start: int = 0,
118            flags: OnigSearchOption = OnigSearchOption.NONE,
119    ) -> Optional[_Match]:
120        s_b, start_b = _start_params(s, start)
121        region = _region()
122
123        ret = _lib.onigcffi_match(
124            self._regex_t, s_b, len(s_b), start_b, region, flags,
125        )
126
127        return _match_ret(ret, s_b, region)
128
129    def search(
130            self,
131            s: str,
132            start: int = 0,
133            flags: OnigSearchOption = OnigSearchOption.NONE,
134    ) -> Optional[_Match]:
135        s_b, start_b = _start_params(s, start)
136        region = _region()
137
138        ret = _lib.onigcffi_search(
139            self._regex_t, s_b, len(s_b), start_b, region, flags,
140        )
141
142        return _match_ret(ret, s_b, region)
143
144
145class _RegSet:
146    def __init__(self, patterns: Tuple[str, ...], regset_t: Any) -> None:
147        self._patterns = patterns
148        self._regset_t = _ffi.gc(regset_t, _lib.onig_regset_free)
149
150    def __repr__(self) -> str:
151        patterns = ', '.join(repr(pattern) for pattern in self._patterns)
152        return f'{__name__}.compile_regset({patterns})'
153
154    def search(
155            self,
156            s: str,
157            start: int = 0,
158            flags: OnigSearchOption = OnigSearchOption.NONE,
159    ) -> Tuple[int, Optional[_Match]]:
160        s_b, start_b = _start_params(s, start)
161        region = _ffi.new('OnigRegion*[1]')
162
163        idx = _lib.onigcffi_regset_search(
164            self._regset_t, s_b, len(s_b), start_b, region, flags,
165        )
166        return idx, _match_ret(idx, s_b, region[0])
167
168
169def _compile_regex_t(pattern: str, dest: Any) -> None:
170    pattern_b = pattern.encode()
171
172    err_info = _ffi.new('OnigErrorInfo[1]')
173    ret = _lib.onigcffi_new(dest, pattern_b, len(pattern_b), err_info)
174    _check(ret, err_info)
175
176
177def compile(pattern: str) -> _Pattern:
178    regex = _ffi.new('regex_t*[1]')
179    _compile_regex_t(pattern, regex)
180    return _Pattern(pattern, regex[0])
181
182
183def compile_regset(*patterns: str) -> _RegSet:
184    regexes = _ffi.new('regex_t*[]', len(patterns))
185    for i, pattern in enumerate(patterns):
186        _compile_regex_t(pattern, regexes + i)
187
188    regset = _ffi.new('OnigRegSet*[1]')
189    _check(_lib.onig_regset_new(regset, len(patterns), regexes))
190    return _RegSet(patterns, regset[0])
191