1#
2# Copyright (C) 2010-2017 Samuel Abels
3# The MIT License (MIT)
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files
7# (the "Software"), to deal in the Software without restriction,
8# including without limitation the rights to use, copy, modify, merge,
9# publish, distribute, sublicense, and/or sell copies of the Software,
10# and to permit persons to whom the Software is furnished to do so,
11# subject to the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23from __future__ import print_function, absolute_import
24import re
25from .string import String
26
27# Matches any opening parenthesis that is neither preceeded by a backslash
28# nor has a "?:" or "?<" appended.
29bracket_re = re.compile(r'(?<!\\)\((?!\?[:<])', re.I)
30
31modifier_grammar = (
32    ('modifier',     r'[i]'),
33    ('invalid_char', r'.'),
34)
35
36modifier_grammar_c = []
37for thetype, regex in modifier_grammar:
38    modifier_grammar_c.append((thetype, re.compile(regex, re.M | re.S)))
39
40
41class Regex(String):
42
43    def __init__(self, lexer, parser, parent):
44        self.delimiter = lexer.token()[1]
45        # String parser collects the regex.
46        String.__init__(self, lexer, parser, parent)
47        self.n_groups = len(bracket_re.findall(self.string))
48        self.flags = 0
49
50        # Collect modifiers.
51        lexer.set_grammar(modifier_grammar_c)
52        while lexer.current_is('modifier'):
53            if lexer.next_if('modifier', 'i'):
54                self.flags = self.flags | re.I
55            else:
56                modifier = lexer.token()[1]
57                error = 'Invalid regular expression modifier "%s"' % modifier
58                lexer.syntax_error(error, self)
59        lexer.restore_grammar()
60
61        # Compile the regular expression.
62        try:
63            re.compile(self.string, self.flags)
64        except Exception as e:
65            error = 'Invalid regular expression %s: %s' % (
66                repr(self.string), e)
67            lexer.syntax_error(error, self)
68
69    def _escape(self, token):
70        char = token[1]
71        if char == self.delimiter:
72            return char
73        return token
74
75    def value(self, context):
76        pattern = String.value(self, context)[0]
77        return re.compile(pattern, self.flags)
78
79    def dump(self, indent=0):
80        print((' ' * indent) + self.name, self.string)
81