1# Copyright (c) 2006-2021 Andrey Golovizin 2# 3# Permission is hereby granted, free of charge, to any person obtaining 4# a copy of this software and associated documentation files (the 5# "Software"), to deal in the Software without restriction, including 6# without limitation the rights to use, copy, modify, merge, publish, 7# distribute, sublicense, and/or sell copies of the Software, and to 8# permit persons to whom the Software is furnished to do so, subject to 9# the following conditions: 10# 11# The above copyright notice and this permission notice shall be 12# included in all copies or substantial portions of the Software. 13# 14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 22"""Base parser class 23""" 24from __future__ import unicode_literals 25 26import re 27 28from pybtex.exceptions import PybtexError 29from pybtex import py3compat 30 31 32class Token(object): 33 def __init__(self, value, pattern): 34 self.value = value 35 self.pattern = pattern 36 37 def __repr__(self): 38 return repr(self.value) 39 40 41class Pattern(object): 42 def __init__(self, regexp, description, flags=0): 43 self.description = description 44 compiled_regexp = re.compile(regexp, flags=flags) 45 self.search = compiled_regexp.search 46 self.match = compiled_regexp.match 47 self.findall = compiled_regexp.findall 48 49 50class Literal(Pattern): 51 def __init__(self, literal): 52 pattern = re.compile(re.escape(literal)) 53 description = u"'{0}'".format(literal) 54 super(Literal, self).__init__(pattern, description) 55 56 57class Scanner(object): 58 text = None 59 lineno = 1 60 pos = 0 61 WHITESPACE = Pattern(r'\s+', 'whitespace') 62 NEWLINE = Pattern(r'\n|(\r\n)|\r', 'newline') 63 64 def __init__(self, text, filename=None): 65 self.text = text 66 self.end_pos = len(text) 67 self.filename = filename 68 69 def skip_to(self, patterns): 70 end = None 71 winning_pattern = None 72 for pattern in patterns: 73 match = pattern.search(self.text, self.pos) 74 if match and (not end or match.end() < end): 75 end = match.end() 76 winning_pattern = pattern 77 if winning_pattern: 78 value = self.text[self.pos: end] 79 self.pos = end 80 # print '>>', value 81 self.update_lineno(value) 82 return Token(value, winning_pattern) 83 84 def update_lineno(self, value): 85 num_newlines = value.count("\n") + value.count("\r") - value.count("\r\n") 86 self.lineno += num_newlines 87 88 def eat_whitespace(self): 89 whitespace = self.WHITESPACE.match(self.text, self.pos) 90 if whitespace: 91 self.pos = whitespace.end() 92 self.update_lineno(whitespace.group()) 93 94 def eof(self): 95 return self.pos == self.end_pos 96 97 def get_token(self, patterns, allow_eof=False): 98 self.eat_whitespace() 99 if self.eof(): 100 if allow_eof: 101 raise EOFError 102 else: 103 raise PrematureEOF(self) 104 for pattern in patterns: 105 match = pattern.match(self.text, self.pos) 106 if match: 107 value = match.group() 108 self.pos = match.end() 109 # print '->', value 110 return Token(value, pattern) 111 112 def optional(self, patterns, allow_eof=False): 113 return self.get_token(patterns, allow_eof=allow_eof) 114 115 def required(self, patterns, description=None, allow_eof=False): 116 token = self.get_token(patterns, allow_eof=allow_eof) 117 if token is None: 118 if not description: 119 description = ' or '.join(pattern.description for pattern in patterns) 120 raise TokenRequired(description, self) 121 else: 122 return token 123 124 def get_error_context_info(self): 125 return self.lineno, self.pos 126 127 def get_error_context(self, context_info): 128 error_lineno, error_pos = context_info 129 if error_lineno is not None: 130 error_lineno0 = error_lineno - 1 131 lines = self.text.splitlines(True) 132 before_error = ''.join(lines[:error_lineno0]) 133 colno = error_pos - len(before_error) 134 context = lines[error_lineno0].rstrip('\r\n') 135 else: 136 colno = None 137 context = None 138 return context, error_lineno, colno 139 140 def get_remainder(self): 141 return self.text[self.pos:] 142 143 144@py3compat.python_2_unicode_compatible 145class PybtexSyntaxError(PybtexError): 146 error_type = 'syntax error' 147 148 def __init__(self, message, parser): 149 super(PybtexSyntaxError, self).__init__(message, filename=parser.filename) 150 self.lineno = parser.lineno 151 self.parser = parser 152 self.error_context_info = parser.get_error_context_info() 153 154 def __str__(self): 155 base_message = super(PybtexSyntaxError, self).__str__() 156 pos = u' in line {0}'.format(self.lineno) if self.lineno is not None else '' 157 return u'{error_type}{pos}: {message}'.format( 158 error_type=self.error_type, 159 pos=pos, 160 message=base_message, 161 ) 162 163 164class PrematureEOF(PybtexSyntaxError): 165 def __init__(self, parser): 166 message = 'premature end of file' 167 super(PrematureEOF, self).__init__(message, parser) 168 169 170class TokenRequired(PybtexSyntaxError): 171 def __init__(self, description, parser): 172 message = u'{0} expected'.format(description) 173 super(TokenRequired, self).__init__(message, parser) 174 175 def get_context(self): 176 context, lineno, colno = self.parser.get_error_context(self.error_context_info) 177 if context is None: 178 return '' 179 if colno == 0: 180 marker = '^^' 181 else: 182 marker = ' ' * (colno - 1) + '^^^' 183 return '\n'.join((context, marker)) 184