1import enum 2import re 3from typing import Any 4from typing import Optional 5from typing import Tuple 6 7import _onigurumacffi 8 9_ffi = _onigurumacffi.ffi 10_lib = _onigurumacffi.lib 11 12_BACKREF_RE = re.compile(r'((?<!\\)(?:\\\\)*)\\([0-9]+)') 13 14 15class OnigError(RuntimeError): 16 pass 17 18 19class OnigSearchOption(enum.IntEnum): 20 NONE = _lib.ONIG_OPTION_NONE 21 NOTBOL = _lib.ONIG_OPTION_NOTBOL 22 NOTEOL = _lib.ONIG_OPTION_NOTEOL 23 POSIX_REGION = _lib.ONIG_OPTION_POSIX_REGION 24 CHECK_VALIDITY_OF_STRING = _lib.ONIG_OPTION_CHECK_VALIDITY_OF_STRING 25 NOT_BEGIN_STRING = _lib.ONIG_OPTION_NOT_BEGIN_STRING 26 NOT_BEGIN_POSITION = _lib.ONIG_OPTION_NOT_BEGIN_POSITION 27 NOT_END_STRING = _lib.ONIG_OPTION_NOT_END_STRING 28 29 30def _err(code: int, *args: Any) -> str: 31 buf = _ffi.new('OnigUChar[ONIG_MAX_ERROR_MESSAGE_LEN]') 32 length = _lib.onig_error_code_to_str(buf, code, *args) 33 return bytes(buf[0:length]).decode() 34 35 36def _check(code: int, *args: Any) -> None: 37 if code < 0: 38 raise OnigError(_err(code, *args)) 39 40 41_check(_lib.onigcffi_initialize()) 42__onig_version__ = _ffi.string(_lib.onig_version()).decode() 43 44 45class _Match: 46 __slots__ = ('_s_b', '_begs', '_ends') 47 48 def __init__( 49 self, 50 s_b: bytes, 51 begs: Tuple[int, ...], 52 ends: Tuple[int, ...], 53 ) -> None: 54 self._s_b = s_b 55 self._begs = begs 56 self._ends = ends 57 58 def __repr__(self) -> str: 59 return f'<onigurumacffi._Match span={self.span()} match={self[0]!r}>' 60 61 def group(self, n: int = 0) -> str: 62 return self._s_b[self._begs[n]:self._ends[n]].decode() 63 64 __getitem__ = group 65 66 def start(self, n: int = 0) -> int: 67 return len(self._s_b[:self._begs[n]].decode()) 68 69 def end(self, n: int = 0) -> int: 70 return len(self._s_b[:self._ends[n]].decode()) 71 72 def span(self, n: int = 0) -> Tuple[int, int]: 73 return self.start(n), self.end(n) 74 75 def expand(self, s: str) -> str: 76 return _BACKREF_RE.sub(lambda m: f'{m[1]}{self[int(m[2])]}', s) 77 78 @property 79 def string(self) -> str: 80 return self._s_b.decode() 81 82 83def _start_params(s: str, start: int) -> Tuple[bytes, int]: 84 return s.encode(), len(s[:start].encode()) 85 86 87def _region() -> Any: 88 return _ffi.gc(_lib.onig_region_new(), _lib.onigcffi_region_free) 89 90 91def _match_ret(ret: int, s_b: bytes, region: Any) -> Optional[_Match]: 92 if ret == _lib.ONIG_MISMATCH: 93 return None 94 else: 95 _check(ret) 96 97 begs = tuple(region[0].beg[0:region[0].num_regs]) 98 ends = tuple(region[0].end[0:region[0].num_regs]) 99 100 return _Match(s_b, begs, ends) 101 102 103class _Pattern: 104 def __init__(self, pattern: str, regex_t: Any) -> None: 105 self._pattern = pattern 106 self._regex_t = _ffi.gc(regex_t, _lib.onig_free) 107 108 def __repr__(self) -> str: 109 return f'{__name__}.compile({self._pattern!r})' 110 111 def number_of_captures(self) -> int: 112 return _lib.onig_number_of_captures(self._regex_t) 113 114 def match( 115 self, 116 s: str, 117 start: int = 0, 118 flags: OnigSearchOption = OnigSearchOption.NONE, 119 ) -> Optional[_Match]: 120 s_b, start_b = _start_params(s, start) 121 region = _region() 122 123 ret = _lib.onigcffi_match( 124 self._regex_t, s_b, len(s_b), start_b, region, flags, 125 ) 126 127 return _match_ret(ret, s_b, region) 128 129 def search( 130 self, 131 s: str, 132 start: int = 0, 133 flags: OnigSearchOption = OnigSearchOption.NONE, 134 ) -> Optional[_Match]: 135 s_b, start_b = _start_params(s, start) 136 region = _region() 137 138 ret = _lib.onigcffi_search( 139 self._regex_t, s_b, len(s_b), start_b, region, flags, 140 ) 141 142 return _match_ret(ret, s_b, region) 143 144 145class _RegSet: 146 def __init__(self, patterns: Tuple[str, ...], regset_t: Any) -> None: 147 self._patterns = patterns 148 self._regset_t = _ffi.gc(regset_t, _lib.onig_regset_free) 149 150 def __repr__(self) -> str: 151 patterns = ', '.join(repr(pattern) for pattern in self._patterns) 152 return f'{__name__}.compile_regset({patterns})' 153 154 def search( 155 self, 156 s: str, 157 start: int = 0, 158 flags: OnigSearchOption = OnigSearchOption.NONE, 159 ) -> Tuple[int, Optional[_Match]]: 160 s_b, start_b = _start_params(s, start) 161 region = _ffi.new('OnigRegion*[1]') 162 163 idx = _lib.onigcffi_regset_search( 164 self._regset_t, s_b, len(s_b), start_b, region, flags, 165 ) 166 return idx, _match_ret(idx, s_b, region[0]) 167 168 169def _compile_regex_t(pattern: str, dest: Any) -> None: 170 pattern_b = pattern.encode() 171 172 err_info = _ffi.new('OnigErrorInfo[1]') 173 ret = _lib.onigcffi_new(dest, pattern_b, len(pattern_b), err_info) 174 _check(ret, err_info) 175 176 177def compile(pattern: str) -> _Pattern: 178 regex = _ffi.new('regex_t*[1]') 179 _compile_regex_t(pattern, regex) 180 return _Pattern(pattern, regex[0]) 181 182 183def compile_regset(*patterns: str) -> _RegSet: 184 regexes = _ffi.new('regex_t*[]', len(patterns)) 185 for i, pattern in enumerate(patterns): 186 _compile_regex_t(pattern, regexes + i) 187 188 regset = _ffi.new('OnigRegSet*[1]') 189 _check(_lib.onig_regset_new(regset, len(patterns), regexes)) 190 return _RegSet(patterns, regset[0]) 191