1# Copyright (c) 2006-2021  Andrey Golovizin
2#
3# Permission is hereby granted, free of charge, to any person obtaining
4# a copy of this software and associated documentation files (the
5# "Software"), to deal in the Software without restriction, including
6# without limitation the rights to use, copy, modify, merge, publish,
7# distribute, sublicense, and/or sell copies of the Software, and to
8# permit persons to whom the Software is furnished to do so, subject to
9# the following conditions:
10#
11# The above copyright notice and this permission notice shall be
12# included in all copies or substantial portions of the Software.
13#
14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22"""Base parser class
23"""
24from __future__ import unicode_literals
25
26import re
27
28from pybtex.exceptions import PybtexError
29from pybtex import py3compat
30
31
32class Token(object):
33    def __init__(self, value, pattern):
34        self.value = value
35        self.pattern = pattern
36
37    def __repr__(self):
38        return repr(self.value)
39
40
41class Pattern(object):
42    def __init__(self, regexp, description, flags=0):
43        self.description = description
44        compiled_regexp = re.compile(regexp, flags=flags)
45        self.search = compiled_regexp.search
46        self.match = compiled_regexp.match
47        self.findall = compiled_regexp.findall
48
49
50class Literal(Pattern):
51    def __init__(self, literal):
52        pattern = re.compile(re.escape(literal))
53        description = u"'{0}'".format(literal)
54        super(Literal, self).__init__(pattern, description)
55
56
57class Scanner(object):
58    text = None
59    lineno = 1
60    pos = 0
61    WHITESPACE = Pattern(r'\s+', 'whitespace')
62    NEWLINE = Pattern(r'\n|(\r\n)|\r', 'newline')
63
64    def __init__(self, text, filename=None):
65        self.text = text
66        self.end_pos = len(text)
67        self.filename = filename
68
69    def skip_to(self, patterns):
70        end = None
71        winning_pattern = None
72        for pattern in patterns:
73            match = pattern.search(self.text, self.pos)
74            if match and (not end or match.end() < end):
75                end = match.end()
76                winning_pattern = pattern
77        if winning_pattern:
78            value = self.text[self.pos: end]
79            self.pos = end
80            # print '>>', value
81            self.update_lineno(value)
82            return Token(value, winning_pattern)
83
84    def update_lineno(self, value):
85        num_newlines = value.count("\n") + value.count("\r") - value.count("\r\n")
86        self.lineno += num_newlines
87
88    def eat_whitespace(self):
89        whitespace = self.WHITESPACE.match(self.text, self.pos)
90        if whitespace:
91            self.pos = whitespace.end()
92            self.update_lineno(whitespace.group())
93
94    def eof(self):
95        return self.pos == self.end_pos
96
97    def get_token(self, patterns, allow_eof=False):
98        self.eat_whitespace()
99        if self.eof():
100            if allow_eof:
101                raise EOFError
102            else:
103                raise PrematureEOF(self)
104        for pattern in patterns:
105            match = pattern.match(self.text, self.pos)
106            if match:
107                value = match.group()
108                self.pos = match.end()
109                # print '->', value
110                return Token(value, pattern)
111
112    def optional(self, patterns, allow_eof=False):
113        return self.get_token(patterns, allow_eof=allow_eof)
114
115    def required(self, patterns, description=None, allow_eof=False):
116        token = self.get_token(patterns, allow_eof=allow_eof)
117        if token is None:
118            if not description:
119                description = ' or '.join(pattern.description for pattern in patterns)
120            raise TokenRequired(description, self)
121        else:
122            return token
123
124    def get_error_context_info(self):
125        return self.lineno, self.pos
126
127    def get_error_context(self, context_info):
128        error_lineno, error_pos = context_info
129        if error_lineno is not None:
130            error_lineno0 = error_lineno - 1
131            lines = self.text.splitlines(True)
132            before_error = ''.join(lines[:error_lineno0])
133            colno = error_pos - len(before_error)
134            context = lines[error_lineno0].rstrip('\r\n')
135        else:
136            colno = None
137            context = None
138        return context, error_lineno, colno
139
140    def get_remainder(self):
141        return self.text[self.pos:]
142
143
144@py3compat.python_2_unicode_compatible
145class PybtexSyntaxError(PybtexError):
146    error_type = 'syntax error'
147
148    def __init__(self, message, parser):
149        super(PybtexSyntaxError, self).__init__(message, filename=parser.filename)
150        self.lineno = parser.lineno
151        self.parser = parser
152        self.error_context_info = parser.get_error_context_info()
153
154    def __str__(self):
155        base_message = super(PybtexSyntaxError, self).__str__()
156        pos = u' in line {0}'.format(self.lineno) if self.lineno is not None else ''
157        return u'{error_type}{pos}: {message}'.format(
158            error_type=self.error_type,
159            pos=pos,
160            message=base_message,
161        )
162
163
164class PrematureEOF(PybtexSyntaxError):
165    def __init__(self, parser):
166        message = 'premature end of file'
167        super(PrematureEOF, self).__init__(message, parser)
168
169
170class TokenRequired(PybtexSyntaxError):
171    def __init__(self, description, parser):
172        message = u'{0} expected'.format(description)
173        super(TokenRequired, self).__init__(message, parser)
174
175    def get_context(self):
176        context, lineno, colno = self.parser.get_error_context(self.error_context_info)
177        if context is None:
178            return ''
179        if colno == 0:
180            marker = '^^'
181        else:
182            marker = ' ' * (colno - 1) + '^^^'
183        return '\n'.join((context, marker))
184