1""" python-pcre
2
3Copyright (c) 2012-2015, Arkadiusz Wahlig
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8    * Redistributions of source code must retain the above copyright
9      notice, this list of conditions and the following disclaimer.
10    * Redistributions in binary form must reproduce the above copyright
11      notice, this list of conditions and the following disclaimer in the
12      documentation and/or other materials provided with the distribution.
13    * Neither the name of the <organization> nor the
14      names of its contributors may be used to endorse or promote products
15      derived from this software without specific prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
21DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27"""
28
29import _pcre
30
31__version__ = '0.7'
32
33class Pattern(_pcre.Pattern):
34    def search(self, string, pos=-1, endpos=-1, flags=0):
35        try:
36            return Match(self, string, pos, endpos, flags)
37        except NoMatch:
38            pass
39
40    def match(self, string, pos=-1, endpos=-1, flags=0):
41        try:
42            return Match(self, string, pos, endpos, flags | ANCHORED)
43        except NoMatch:
44            pass
45
46    def split(self, string, maxsplit=0, flags=0):
47        output = []
48        pos = n = 0
49        for match in self.finditer(string, flags=flags):
50            start, end = match.span()
51            if start != end:
52                output.append(string[pos:start])
53                output.extend(match.groups())
54                pos = end
55                n += 1
56                if 0 < maxsplit <= n:
57                    break
58        output.append(string[pos:])
59        return output
60
61    def findall(self, string, pos=-1, endpos=-1, flags=0):
62        matches = self.finditer(string, pos, endpos, flags)
63        if self.groups == 0:
64            return [m.group() for m in matches]
65        if self.groups == 1:
66            return [m.groups('')[0] for m in matches]
67        return [m.groups('') for m in matches]
68
69    def finditer(self, string, pos=-1, endpos=-1, flags=0):
70        try:
71            while 1:
72                match = Match(self, string, pos, endpos, flags)
73                yield match
74                start, pos = match.span()
75                if pos == start:
76                    pos += 1
77        except NoMatch:
78            pass
79
80    def sub(self, repl, string, count=0, flags=0):
81        return self.subn(repl, string, count, flags)[0]
82
83    def subn(self, repl, string, count=0, flags=0):
84        if not hasattr(repl, '__call__'):
85            repl = lambda match, tmpl=repl: match.expand(tmpl)
86        output = []
87        pos = n = 0
88        for match in self.finditer(string, flags=flags):
89            start, end = match.span()
90            if not pos == start == end or pos == 0:
91                output.extend((string[pos:start], repl(match)))
92                pos = end
93                n += 1
94                if 0 < count <= n:
95                    break
96        output.append(string[pos:])
97        return (string[:0].join(output), n)
98
99    def __reduce__(self):
100        if self.pattern is None:
101            return (Pattern, (None, 0, self.dumps()))
102        return (Pattern, (self.pattern, self.flags))
103
104    def __repr__(self):
105        if self.pattern is None:
106            return '{0}.loads({1})'.format(__name__, repr(self.dumps()))
107        flags = self.flags
108        if flags:
109            v = []
110            for name in _FLAGS:
111                value = getattr(_pcre, name)
112                if flags & value:
113                    v.append('{0}.{1}'.format(__name__, name))
114                    flags &= ~value
115            if flags:
116                v.append(hex(flags))
117            return '{0}.compile({1}, {2})'.format(__name__, repr(self.pattern), '|'.join(v))
118        return '{0}.compile({1})'.format(__name__, repr(self.pattern))
119
120class Match(_pcre.Match):
121    def expand(self, template):
122        return template.format(self.group(), *self.groups(''), **self.groupdict(''))
123
124    def __repr__(self):
125        cls = self.__class__
126        return '<{0}.{1} object; span={2}, match={3}>'.format(cls.__module__,
127            cls.__name__, repr(self.span()), repr(self.group()))
128
129class REMatch(Match):
130    def expand(self, template):
131        groups = (self.group(),) + self.groups()
132        groupdict = self.groupdict()
133        def repl(match):
134            esc, index, group, badgroup = match.groups()
135            if esc:
136                return ('\\' + esc).decode('string-escape')
137            if badgroup:
138                raise PCREError(100, 'invalid group name')
139            try:
140                if index or group.isdigit():
141                    result = groups[int(index or group)]
142                else:
143                    result = groupdict[group]
144            except IndexError:
145                raise PCREError(15, 'invalid group reference')
146            except KeyError:
147                raise IndexError('unknown group name')
148            if result is None:
149                raise PCREError(101, 'unmatched group')
150            return result
151        return _REGEX_RE_TEMPLATE.sub(repl, template)
152
153def compile(pattern, flags=0):
154    if isinstance(pattern, _pcre.Pattern):
155        if flags != 0:
156            raise ValueError('cannot process flags argument with a compiled pattern')
157        return pattern
158    return Pattern(pattern, flags)
159
160def match(pattern, string, flags=0):
161    return compile(pattern, flags).match(string)
162
163def search(pattern, string, flags=0):
164    return compile(pattern, flags).search(string)
165
166def split(pattern, string, maxsplit=0, flags=0):
167    return compile(pattern, flags).split(string, maxsplit)
168
169def findall(pattern, string, flags=0):
170    return compile(pattern, flags).findall(string)
171
172def finditer(pattern, string, flags=0):
173    return compile(pattern, flags).finditer(string)
174
175def sub(pattern, repl, string, count=0, flags=0):
176    return compile(pattern, flags).sub(repl, string, count)
177
178def subn(pattern, repl, string, count=0, flags=0):
179    return compile(pattern, flags).subn(repl, string, count)
180
181def loads(data):
182    # Loads a pattern serialized with Pattern.dumps().
183    return Pattern(None, loads=data)
184
185def escape(pattern):
186    # Escapes a regular expression.
187    s = list(pattern)
188    alnum = _ALNUM
189    for i, c in enumerate(pattern):
190        if c not in alnum:
191            s[i] = '\\000' if c == '\000' else ('\\' + c)
192    return pattern[:0].join(s)
193
194def escape_template(template):
195    # Escapes "{" and "}" characters in the template.
196    return template.replace('{', '{{').replace('}', '}}')
197
198def convert_re_template(template):
199    # Converts re template r"\1\g<id>" to "{1}{id}" format.
200    def repl(match):
201        esc, index, group, badgroup = match.groups()
202        if esc:
203            return ('\\' + esc).decode('string-escape')
204        if badgroup:
205            raise PCREError(100, 'invalid group name')
206        return '{%s}' % (index or group)
207    return _REGEX_RE_TEMPLATE.sub(repl, escape_template(template))
208
209def enable_re_template_mode():
210    # Makes calls to sub() take re templates instead of str.format() templates.
211    global Match
212    Match = REMatch
213
214_ALNUM = frozenset('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890')
215error = PCREError = _pcre.PCREError
216NoMatch = _pcre.NoMatch
217MAXREPEAT = 65536
218
219# Provides PCRE build-time configuration.
220config = type('config', (), _pcre.get_config())
221
222# Pattern and/or match flags
223_FLAGS = ('IGNORECASE', 'MULTILINE', 'DOTALL', 'UNICODE', 'VERBOSE',
224          'ANCHORED', 'NOTBOL', 'NOTEOL', 'NOTEMPTY', 'NOTEMPTY_ATSTART',
225          'UTF8', 'NO_UTF8_CHECK')
226
227# Copy flags from _pcre module
228ns = globals()
229for name in _FLAGS:
230    ns[name] = getattr(_pcre, name)
231del ns, name
232
233# Short versions
234I = IGNORECASE
235M = MULTILINE
236S = DOTALL
237U = UNICODE
238X = VERBOSE
239
240# Study flags
241STUDY_JIT = _pcre.STUDY_JIT
242
243# Used to parse re templates.
244_REGEX_RE_TEMPLATE = compile(r'\\(?:([\\abfnrtv]|0[0-7]{0,2}|[0-7]{3})|'
245                             r'(\d{1,2})|g<(\d+|[^\d\W]\w*)>|(g[^>]*))')
246