1# -*- coding: utf-8 -*-
2# Copyright (C) 2017      by Juancarlo Añez
3# Copyright (C) 2012-2016 by Juancarlo Añez and Thomas Bragg
4"""
5The Buffer class provides the functionality required by a parser-driven lexer.
6
7Line analysis and caching are done so the parser can freely move with goto(p)
8to any position in the parsed text, and still recover accurate information
9about source lines and content.
10"""
11from __future__ import (absolute_import, division, print_function,
12                        unicode_literals)
13
14import os
15from itertools import takewhile, repeat
16
17from grako.util import identity, imap, ustr, strtype
18from grako.util import extend_list, contains_sublist
19from grako.util import re as regexp
20from grako.util import WHITESPACE_RE, RE_FLAGS
21from grako.exceptions import ParseError
22from grako.infos import PosLine, LineIndexInfo, LineInfo, CommentInfo
23
24RETYPE = type(regexp.compile('.'))
25
26# for backwards compatibility with existing parsers
27LineIndexEntry = LineIndexInfo
28
29
30class Buffer(object):
31    def __init__(self,
32                 text,
33                 filename=None,
34                 whitespace=None,
35                 comments_re=None,
36                 eol_comments_re=None,
37                 ignorecase=False,
38                 nameguard=None,
39                 comment_recovery=False,
40                 namechars='',
41                 **kwargs):
42        text = ustr(text)
43        self.text = self.original_text = text
44        self.filename = filename or ''
45
46        self.whitespace = whitespace
47
48        self.comments_re = comments_re
49        self.eol_comments_re = eol_comments_re
50        self.ignorecase = ignorecase
51        self.nameguard = (nameguard
52                          if nameguard is not None
53                          else bool(self.whitespace_re))
54        self.comment_recovery = comment_recovery
55        self.namechars = namechars
56        self._namechar_set = set(namechars)
57        if namechars:
58            self.nameguard = True
59
60        self._pos = 0
61        self._len = 0
62        self._linecount = 0
63        self._lines = []
64        self._line_index = []
65        self._line_cache = []
66        self._comment_index = []
67        self._re_cache = {}
68
69        self._preprocess()
70        self._postprocess()
71
72    @property
73    def whitespace(self):
74        return self._whitespace
75
76    @whitespace.setter
77    def whitespace(self, value):
78        self._whitespace = value
79        self.whitespace_re = self.build_whitespace_re(value)
80
81    @staticmethod
82    def build_whitespace_re(whitespace):
83        if whitespace is None:
84            return WHITESPACE_RE
85        elif isinstance(whitespace, RETYPE):
86            return whitespace
87        elif whitespace:
88            if not isinstance(whitespace, strtype):
89                # a list or a set?
90                whitespace = ''.join(c for c in whitespace)
91            return regexp.compile(
92                '[%s]+' % regexp.escape(whitespace), RE_FLAGS
93            )
94        else:
95            return None
96
97    def _preprocess(self, *args, **kwargs):
98        lines, index = self._preprocess_block(self.filename, self.text)
99        self._lines = lines
100        self._line_index = index
101        self.text = self.join_block_lines(lines)
102
103    def _postprocess(self):
104        cache, count = PosLine.build_line_cache(self._lines)
105        self._line_cache = cache
106        self._linecount = count
107        self._len = len(self.text)
108
109    def _preprocess_block(self, name, block, **kwargs):
110        lines = self.split_block_lines(block)
111        index = LineIndexInfo.block_index(name, len(lines))
112        return self.process_block(name, lines, index, **kwargs)
113
114    def split_block_lines(self, block):
115        return block.splitlines(True)
116
117    def join_block_lines(self, lines):
118        return ''.join(lines)
119
120    def process_block(self, name, lines, index, **kwargs):
121        return lines, index
122
123    def include(self, lines, index, i, j, name, block, **kwargs):
124        blines, bindex = self._preprocess_block(name, block, **kwargs)
125        assert len(blines) == len(bindex)
126        lines[i:j] = blines
127        index[i:j] = bindex
128        assert len(lines) == len(index)
129        return j + len(blines) - 1
130
131    def include_file(self, source, name, lines, index, i, j):
132        text, filename = self.get_include(source, name)
133        return self.include(lines, index, i, j, filename, text)
134
135    def get_include(self, source, filename):
136        source = os.path.abspath(source)
137        base = os.path.dirname(source)
138        include = os.path.join(base, filename)
139        try:
140            with open(include) as f:
141                return f.read(), include
142        except IOError:
143            raise ParseError('include not found: %s' % include)
144
145    def replace_lines(self, i, j, name, block):
146        lines = self.split_block_lines(self.text)
147        index = list(self._line_index)
148
149        endline = self.include(lines, index, i, j, name, block)
150
151        self.text = self.join_block_lines(lines)
152        self._line_index = index
153        self._postprocess()
154
155        newtext = self.join_block_lines(lines[j + 1:endline + 2])
156        return endline, newtext
157
158    @property
159    def pos(self):
160        return self._pos
161
162    @pos.setter
163    def pos(self, p):
164        self.goto(p)
165
166    @property
167    def line(self):
168        return self.posline()
169
170    @property
171    def col(self):
172        return self.poscol()
173
174    def posline(self, pos=None):
175        if pos is None:
176            pos = self._pos
177        return self._line_cache[pos].line
178
179    def poscol(self, pos=None):
180        if pos is None:
181            pos = self._pos
182        start = self._line_cache[pos].start
183        return pos - start
184
185    def atend(self):
186        return self._pos >= self._len
187
188    def ateol(self):
189        return self.atend() or self.current() in '\r\n'
190
191    def current(self):
192        if self._pos >= self._len:
193            return None
194        return self.text[self._pos]
195
196    def at(self, p):
197        if p >= self._len:
198            return None
199        return self.text[p]
200
201    def peek(self, n=1):
202        return self.at(self._pos + n)
203
204    def next(self):
205        if self._pos >= self._len:
206            return None
207        c = self.text[self._pos]
208        self._pos += 1
209        return c
210
211    def goto(self, p):
212        self._pos = max(0, min(len(self.text), p))
213
214    def move(self, n):
215        self.goto(self.pos + n)
216
217    def comments(self, p, clear=False):
218        if not self.comment_recovery or not self._comment_index:
219            return CommentInfo([], [])
220
221        n = self.posline(p)
222        if n >= len(self._comment_index):
223            return CommentInfo([], [])
224
225        eolcmm = []
226        if n < len(self._comment_index):
227            eolcmm = self._comment_index[n].eol
228            if clear:
229                self._comment_index[n].eol = []
230
231        cmm = []
232        while n >= 0 and self._comment_index[n].inline:
233            cmm.insert(0, self._comment_index[n].inline)
234            if clear:
235                self._comment_index[n].inline = []
236            n -= 1
237
238        return CommentInfo(cmm, eolcmm)
239
240    def _index_comments(self, comments, selector):
241        if comments and self.comment_recovery:
242            n = self.line
243            extend_list(self._comment_index, n, default=CommentInfo.new_comment)
244            previous = selector(self._comment_index[n])
245            if not contains_sublist(previous, comments):  # FIXME: will discard repeated comments
246                previous.extend(comments)
247
248    def _eat_regex(self, regex):
249        if regex is not None:
250            return list(takewhile(identity, imap(self.matchre, repeat(regex))))
251
252    def eat_whitespace(self):
253        return self._eat_regex(self.whitespace_re)
254
255    def eat_comments(self):
256        comments = self._eat_regex(self.comments_re)
257        self._index_comments(comments, lambda x: x.inline)
258
259    def eat_eol_comments(self):
260        comments = self._eat_regex(self.eol_comments_re)
261        self._index_comments(comments, lambda x: x.eol)
262
263    def next_token(self):
264        p = None
265        while self._pos != p:
266            p = self._pos
267            self.eat_eol_comments()
268            self.eat_comments()
269            self.eat_whitespace()
270
271    def skip_to(self, c):
272        p = self._pos
273        le = self._len
274        while p < le and self.text[p] != c:
275            p += 1
276        self.goto(p)
277        return self.pos
278
279    def skip_past(self, c):
280        self.skip_to(c)
281        self.next()
282        return self.pos
283
284    def skip_to_eol(self):
285        return self.skip_to('\n')
286
287    def scan_space(self, offset=0):
288        return (
289            self.whitespace_re and
290            self._scanre(self.whitespace_re, offset=offset) is not None
291        )
292
293    def is_space(self):
294        return self.scan_space()
295
296    def is_name_char(self, c):
297        return c is not None and c.isalnum() or c in self._namechar_set
298
299    def match(self, token, ignorecase=None):
300        ignorecase = ignorecase if ignorecase is not None else self.ignorecase
301
302        if token is None:
303            return self.atend()
304
305        p = self.pos
306        if ignorecase:
307            is_match = self.text[p:p + len(token)].lower() == token.lower()
308        else:
309            is_match = self.text[p:p + len(token)] == token
310
311        if is_match:
312            self.move(len(token))
313            if not self.nameguard:
314                return token
315
316            partial_match = (
317                token.isalnum() and
318                token[0].isalpha() and
319                self.is_name_char(self.current())
320            )
321            if not partial_match:
322                return token
323        self.goto(p)
324
325    def matchre(self, pattern, ignorecase=None):
326        matched = self._scanre(pattern, ignorecase=ignorecase)
327        if matched:
328            token = matched.group()
329            self.move(len(token))
330            return token
331
332    def _scanre(self, pattern, ignorecase=None, offset=0):
333        ignorecase = ignorecase if ignorecase is not None else self.ignorecase
334
335        if isinstance(pattern, RETYPE):
336            re = pattern
337        elif pattern in self._re_cache:
338            re = self._re_cache[pattern]
339        else:
340            flags = RE_FLAGS | (regexp.IGNORECASE if ignorecase else 0)
341            re = regexp.compile(
342                pattern,
343                flags
344            )
345            self._re_cache[pattern] = re
346        return re.match(self.text, self.pos + offset)
347
348    @property
349    def linecount(self):
350        return self._linecount
351
352    def line_info(self, pos=None):
353        if pos is None:
354            pos = self._pos
355
356        if pos >= len(self._line_cache):
357            return LineInfo(self.filename, self.linecount, 0, self._len, self._len, '')
358
359        start, line, length = self._line_cache[pos]
360        end = start + length
361        col = pos - start
362
363        text = self.text[start:end]
364        n = min(len(self._line_index) - 1, line)
365        filename, line = self._line_index[n]
366
367        return LineInfo(filename, line, col, start, end, text)
368
369    def lookahead(self):
370        if self.atend():
371            return ''
372        info = self.line_info()
373        text = info.text[info.col:info.col + 1 + 80]
374        text = self.split_block_lines(text)[0].rstrip()
375        return '<%d:%d>%s' % (info.line + 1, info.col + 1, text)
376
377    def get_line(self, n=None):
378        if n is None:
379            n = self.line
380        return self._lines[n]
381
382    def get_lines(self, start=None, end=None):
383        if start is None:
384            start = 0
385        if end is None:
386            end = len(self._lines)
387        return self._lines[start:end + 1]
388
389    def line_index(self, start=0, end=None):
390        if end is None:
391            end = len(self._line_index)
392        return self._line_index[start:1 + end]
393
394    def __repr__(self):
395        return '%s@%d' % (type(self).__name__, self.pos)
396
397    def __json__(self):
398        return None
399