1""" python-pcre 2 3Copyright (c) 2012-2015, Arkadiusz Wahlig 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of the <organization> nor the 14 names of its contributors may be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY 21DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27""" 28 29import _pcre 30 31__version__ = '0.7' 32 33class Pattern(_pcre.Pattern): 34 def search(self, string, pos=-1, endpos=-1, flags=0): 35 try: 36 return Match(self, string, pos, endpos, flags) 37 except NoMatch: 38 pass 39 40 def match(self, string, pos=-1, endpos=-1, flags=0): 41 try: 42 return Match(self, string, pos, endpos, flags | ANCHORED) 43 except NoMatch: 44 pass 45 46 def split(self, string, maxsplit=0, flags=0): 47 output = [] 48 pos = n = 0 49 for match in self.finditer(string, flags=flags): 50 start, end = match.span() 51 if start != end: 52 output.append(string[pos:start]) 53 output.extend(match.groups()) 54 pos = end 55 n += 1 56 if 0 < maxsplit <= n: 57 break 58 output.append(string[pos:]) 59 return output 60 61 def findall(self, string, pos=-1, endpos=-1, flags=0): 62 matches = self.finditer(string, pos, endpos, flags) 63 if self.groups == 0: 64 return [m.group() for m in matches] 65 if self.groups == 1: 66 return [m.groups('')[0] for m in matches] 67 return [m.groups('') for m in matches] 68 69 def finditer(self, string, pos=-1, endpos=-1, flags=0): 70 try: 71 while 1: 72 match = Match(self, string, pos, endpos, flags) 73 yield match 74 start, pos = match.span() 75 if pos == start: 76 pos += 1 77 except NoMatch: 78 pass 79 80 def sub(self, repl, string, count=0, flags=0): 81 return self.subn(repl, string, count, flags)[0] 82 83 def subn(self, repl, string, count=0, flags=0): 84 if not hasattr(repl, '__call__'): 85 repl = lambda match, tmpl=repl: match.expand(tmpl) 86 output = [] 87 pos = n = 0 88 for match in self.finditer(string, flags=flags): 89 start, end = match.span() 90 if not pos == start == end or pos == 0: 91 output.extend((string[pos:start], repl(match))) 92 pos = end 93 n += 1 94 if 0 < count <= n: 95 break 96 output.append(string[pos:]) 97 return (string[:0].join(output), n) 98 99 def __reduce__(self): 100 if self.pattern is None: 101 return (Pattern, (None, 0, self.dumps())) 102 return (Pattern, (self.pattern, self.flags)) 103 104 def __repr__(self): 105 if self.pattern is None: 106 return '{0}.loads({1})'.format(__name__, repr(self.dumps())) 107 flags = self.flags 108 if flags: 109 v = [] 110 for name in _FLAGS: 111 value = getattr(_pcre, name) 112 if flags & value: 113 v.append('{0}.{1}'.format(__name__, name)) 114 flags &= ~value 115 if flags: 116 v.append(hex(flags)) 117 return '{0}.compile({1}, {2})'.format(__name__, repr(self.pattern), '|'.join(v)) 118 return '{0}.compile({1})'.format(__name__, repr(self.pattern)) 119 120class Match(_pcre.Match): 121 def expand(self, template): 122 return template.format(self.group(), *self.groups(''), **self.groupdict('')) 123 124 def __repr__(self): 125 cls = self.__class__ 126 return '<{0}.{1} object; span={2}, match={3}>'.format(cls.__module__, 127 cls.__name__, repr(self.span()), repr(self.group())) 128 129class REMatch(Match): 130 def expand(self, template): 131 groups = (self.group(),) + self.groups() 132 groupdict = self.groupdict() 133 def repl(match): 134 esc, index, group, badgroup = match.groups() 135 if esc: 136 return ('\\' + esc).decode('string-escape') 137 if badgroup: 138 raise PCREError(100, 'invalid group name') 139 try: 140 if index or group.isdigit(): 141 result = groups[int(index or group)] 142 else: 143 result = groupdict[group] 144 except IndexError: 145 raise PCREError(15, 'invalid group reference') 146 except KeyError: 147 raise IndexError('unknown group name') 148 if result is None: 149 raise PCREError(101, 'unmatched group') 150 return result 151 return _REGEX_RE_TEMPLATE.sub(repl, template) 152 153def compile(pattern, flags=0): 154 if isinstance(pattern, _pcre.Pattern): 155 if flags != 0: 156 raise ValueError('cannot process flags argument with a compiled pattern') 157 return pattern 158 return Pattern(pattern, flags) 159 160def match(pattern, string, flags=0): 161 return compile(pattern, flags).match(string) 162 163def search(pattern, string, flags=0): 164 return compile(pattern, flags).search(string) 165 166def split(pattern, string, maxsplit=0, flags=0): 167 return compile(pattern, flags).split(string, maxsplit) 168 169def findall(pattern, string, flags=0): 170 return compile(pattern, flags).findall(string) 171 172def finditer(pattern, string, flags=0): 173 return compile(pattern, flags).finditer(string) 174 175def sub(pattern, repl, string, count=0, flags=0): 176 return compile(pattern, flags).sub(repl, string, count) 177 178def subn(pattern, repl, string, count=0, flags=0): 179 return compile(pattern, flags).subn(repl, string, count) 180 181def loads(data): 182 # Loads a pattern serialized with Pattern.dumps(). 183 return Pattern(None, loads=data) 184 185def escape(pattern): 186 # Escapes a regular expression. 187 s = list(pattern) 188 alnum = _ALNUM 189 for i, c in enumerate(pattern): 190 if c not in alnum: 191 s[i] = '\\000' if c == '\000' else ('\\' + c) 192 return pattern[:0].join(s) 193 194def escape_template(template): 195 # Escapes "{" and "}" characters in the template. 196 return template.replace('{', '{{').replace('}', '}}') 197 198def convert_re_template(template): 199 # Converts re template r"\1\g<id>" to "{1}{id}" format. 200 def repl(match): 201 esc, index, group, badgroup = match.groups() 202 if esc: 203 return ('\\' + esc).decode('string-escape') 204 if badgroup: 205 raise PCREError(100, 'invalid group name') 206 return '{%s}' % (index or group) 207 return _REGEX_RE_TEMPLATE.sub(repl, escape_template(template)) 208 209def enable_re_template_mode(): 210 # Makes calls to sub() take re templates instead of str.format() templates. 211 global Match 212 Match = REMatch 213 214_ALNUM = frozenset('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890') 215error = PCREError = _pcre.PCREError 216NoMatch = _pcre.NoMatch 217MAXREPEAT = 65536 218 219# Provides PCRE build-time configuration. 220config = type('config', (), _pcre.get_config()) 221 222# Pattern and/or match flags 223_FLAGS = ('IGNORECASE', 'MULTILINE', 'DOTALL', 'UNICODE', 'VERBOSE', 224 'ANCHORED', 'NOTBOL', 'NOTEOL', 'NOTEMPTY', 'NOTEMPTY_ATSTART', 225 'UTF8', 'NO_UTF8_CHECK') 226 227# Copy flags from _pcre module 228ns = globals() 229for name in _FLAGS: 230 ns[name] = getattr(_pcre, name) 231del ns, name 232 233# Short versions 234I = IGNORECASE 235M = MULTILINE 236S = DOTALL 237U = UNICODE 238X = VERBOSE 239 240# Study flags 241STUDY_JIT = _pcre.STUDY_JIT 242 243# Used to parse re templates. 244_REGEX_RE_TEMPLATE = compile(r'\\(?:([\\abfnrtv]|0[0-7]{0,2}|[0-7]{3})|' 245 r'(\d{1,2})|g<(\d+|[^\d\W]\w*)>|(g[^>]*))') 246