1# -*- coding: utf-8 -*-
2"""
3    pygments.scanner
4    ~~~~~~~~~~~~~~~~
5
6    This library implements a regex based scanner. Some languages
7    like Pascal are easy to parse but have some keywords that
8    depend on the context. Because of this it's impossible to lex
9    that just by using a regular expression lexer like the
10    `RegexLexer`.
11
12    Have a look at the `DelphiLexer` to get an idea of how to use
13    this scanner.
14
15    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
16    :license: BSD, see LICENSE for details.
17"""
18import re
19
20
21class EndOfText(RuntimeError):
22    """
23    Raise if end of text is reached and the user
24    tried to call a match function.
25    """
26
27
28class Scanner:
29    """
30    Simple scanner
31
32    All method patterns are regular expression strings (not
33    compiled expressions!)
34    """
35
36    def __init__(self, text, flags=0):
37        """
38        :param text:    The text which should be scanned
39        :param flags:   default regular expression flags
40        """
41        self.data = text
42        self.data_length = len(text)
43        self.start_pos = 0
44        self.pos = 0
45        self.flags = flags
46        self.last = None
47        self.match = None
48        self._re_cache = {}
49
50    def eos(self):
51        """`True` if the scanner reached the end of text."""
52        return self.pos >= self.data_length
53    eos = property(eos, eos.__doc__)
54
55    def check(self, pattern):
56        """
57        Apply `pattern` on the current position and return
58        the match object. (Doesn't touch pos). Use this for
59        lookahead.
60        """
61        if self.eos:
62            raise EndOfText()
63        if pattern not in self._re_cache:
64            self._re_cache[pattern] = re.compile(pattern, self.flags)
65        return self._re_cache[pattern].match(self.data, self.pos)
66
67    def test(self, pattern):
68        """Apply a pattern on the current position and check
69        if it patches. Doesn't touch pos.
70        """
71        return self.check(pattern) is not None
72
73    def scan(self, pattern):
74        """
75        Scan the text for the given pattern and update pos/match
76        and related fields. The return value is a boolen that
77        indicates if the pattern matched. The matched value is
78        stored on the instance as ``match``, the last value is
79        stored as ``last``. ``start_pos`` is the position of the
80        pointer before the pattern was matched, ``pos`` is the
81        end position.
82        """
83        if self.eos:
84            raise EndOfText()
85        if pattern not in self._re_cache:
86            self._re_cache[pattern] = re.compile(pattern, self.flags)
87        self.last = self.match
88        m = self._re_cache[pattern].match(self.data, self.pos)
89        if m is None:
90            return False
91        self.start_pos = m.start()
92        self.pos = m.end()
93        self.match = m.group()
94        return True
95
96    def get_char(self):
97        """Scan exactly one char."""
98        self.scan('.')
99
100    def __repr__(self):
101        return '<%s %d/%d>' % (
102            self.__class__.__name__,
103            self.pos,
104            self.data_length
105        )
106