1#=======================================================================
2#
3#   Python Lexical Analyser
4#
5#   Traditional Regular Expression Syntax
6#
7#=======================================================================
8
9from __future__ import absolute_import
10
11from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
12from .Errors import PlexError
13
14
15class RegexpSyntaxError(PlexError):
16    pass
17
18
19def re(s):
20    """
21    Convert traditional string representation of regular expression |s|
22    into Plex representation.
23    """
24    return REParser(s).parse_re()
25
26
27class REParser(object):
28    def __init__(self, s):
29        self.s = s
30        self.i = -1
31        self.end = 0
32        self.next()
33
34    def parse_re(self):
35        re = self.parse_alt()
36        if not self.end:
37            self.error("Unexpected %s" % repr(self.c))
38        return re
39
40    def parse_alt(self):
41        """Parse a set of alternative regexps."""
42        re = self.parse_seq()
43        if self.c == '|':
44            re_list = [re]
45            while self.c == '|':
46                self.next()
47                re_list.append(self.parse_seq())
48            re = Alt(*re_list)
49        return re
50
51    def parse_seq(self):
52        """Parse a sequence of regexps."""
53        re_list = []
54        while not self.end and not self.c in "|)":
55            re_list.append(self.parse_mod())
56        return Seq(*re_list)
57
58    def parse_mod(self):
59        """Parse a primitive regexp followed by *, +, ? modifiers."""
60        re = self.parse_prim()
61        while not self.end and self.c in "*+?":
62            if self.c == '*':
63                re = Rep(re)
64            elif self.c == '+':
65                re = Rep1(re)
66            else:  # self.c == '?'
67                re = Opt(re)
68            self.next()
69        return re
70
71    def parse_prim(self):
72        """Parse a primitive regexp."""
73        c = self.get()
74        if c == '.':
75            re = AnyBut("\n")
76        elif c == '^':
77            re = Bol
78        elif c == '$':
79            re = Eol
80        elif c == '(':
81            re = self.parse_alt()
82            self.expect(')')
83        elif c == '[':
84            re = self.parse_charset()
85            self.expect(']')
86        else:
87            if c == '\\':
88                c = self.get()
89            re = Char(c)
90        return re
91
92    def parse_charset(self):
93        """Parse a charset. Does not include the surrounding []."""
94        char_list = []
95        invert = 0
96        if self.c == '^':
97            invert = 1
98            self.next()
99        if self.c == ']':
100            char_list.append(']')
101            self.next()
102        while not self.end and self.c != ']':
103            c1 = self.get()
104            if self.c == '-' and self.lookahead(1) != ']':
105                self.next()
106                c2 = self.get()
107                for a in range(ord(c1), ord(c2) + 1):
108                    char_list.append(chr(a))
109            else:
110                char_list.append(c1)
111        chars = ''.join(char_list)
112        if invert:
113            return AnyBut(chars)
114        else:
115            return Any(chars)
116
117    def next(self):
118        """Advance to the next char."""
119        s = self.s
120        i = self.i = self.i + 1
121        if i < len(s):
122            self.c = s[i]
123        else:
124            self.c = ''
125            self.end = 1
126
127    def get(self):
128        if self.end:
129            self.error("Premature end of string")
130        c = self.c
131        self.next()
132        return c
133
134    def lookahead(self, n):
135        """Look ahead n chars."""
136        j = self.i + n
137        if j < len(self.s):
138            return self.s[j]
139        else:
140            return ''
141
142    def expect(self, c):
143        """
144        Expect to find character |c| at current position.
145        Raises an exception otherwise.
146        """
147        if self.c == c:
148            self.next()
149        else:
150            self.error("Missing %s" % repr(c))
151
152    def error(self, mess):
153        """Raise exception to signal syntax error in regexp."""
154        raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
155            repr(self.s), self.i, mess))
156
157
158
159