1#=======================================================================
2#
3#   Python Lexical Analyser
4#
5#   Lexical Analyser Specification
6#
7#=======================================================================
8
9from __future__ import absolute_import
10
11import types
12
13from . import Actions
14from . import DFA
15from . import Errors
16from . import Machines
17from . import Regexps
18
19# debug_flags for Lexicon constructor
20DUMP_NFA = 1
21DUMP_DFA = 2
22
23
24class State(object):
25    """
26    This class is used as part of a Plex.Lexicon specification to
27    introduce a user-defined state.
28
29    Constructor:
30
31       State(name, token_specifications)
32    """
33
34    name = None
35    tokens = None
36
37    def __init__(self, name, tokens):
38        self.name = name
39        self.tokens = tokens
40
41
42class Lexicon(object):
43    """
44    Lexicon(specification) builds a lexical analyser from the given
45    |specification|. The specification consists of a list of
46    specification items. Each specification item may be either:
47
48       1) A token definition, which is a tuple:
49
50             (pattern, action)
51
52          The |pattern| is a regular axpression built using the
53          constructors defined in the Plex module.
54
55          The |action| is the action to be performed when this pattern
56          is recognised (see below).
57
58       2) A state definition:
59
60             State(name, tokens)
61
62          where |name| is a character string naming the state,
63          and |tokens| is a list of token definitions as
64          above. The meaning and usage of states is described
65          below.
66
67    Actions
68    -------
69
70    The |action| in a token specication may be one of three things:
71
72       1) A function, which is called as follows:
73
74             function(scanner, text)
75
76          where |scanner| is the relevant Scanner instance, and |text|
77          is the matched text. If the function returns anything
78          other than None, that value is returned as the value of the
79          token. If it returns None, scanning continues as if the IGNORE
80          action were specified (see below).
81
82        2) One of the following special actions:
83
84           IGNORE means that the recognised characters will be treated as
85                  white space and ignored. Scanning will continue until
86                  the next non-ignored token is recognised before returning.
87
88           TEXT   causes the scanned text itself to be returned as the
89                  value of the token.
90
91        3) Any other value, which is returned as the value of the token.
92
93    States
94    ------
95
96    At any given time, the scanner is in one of a number of states.
97    Associated with each state is a set of possible tokens. When scanning,
98    only tokens associated with the current state are recognised.
99
100    There is a default state, whose name is the empty string. Token
101    definitions which are not inside any State definition belong to
102    the default state.
103
104    The initial state of the scanner is the default state. The state can
105    be changed in one of two ways:
106
107       1) Using Begin(state_name) as the action of a token.
108
109       2) Calling the begin(state_name) method of the Scanner.
110
111    To change back to the default state, use '' as the state name.
112    """
113
114    machine = None  # Machine
115    tables = None   # StateTableMachine
116
117    def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
118        if not isinstance(specifications, list):
119            raise Errors.InvalidScanner("Scanner definition is not a list")
120        if timings:
121            from .Timing import time
122
123            total_time = 0.0
124            time1 = time()
125        nfa = Machines.Machine()
126        default_initial_state = nfa.new_initial_state('')
127        token_number = 1
128        for spec in specifications:
129            if isinstance(spec, State):
130                user_initial_state = nfa.new_initial_state(spec.name)
131                for token in spec.tokens:
132                    self.add_token_to_machine(
133                        nfa, user_initial_state, token, token_number)
134                    token_number += 1
135            elif isinstance(spec, tuple):
136                self.add_token_to_machine(
137                    nfa, default_initial_state, spec, token_number)
138                token_number += 1
139            else:
140                raise Errors.InvalidToken(
141                    token_number,
142                    "Expected a token definition (tuple) or State instance")
143        if timings:
144            time2 = time()
145            total_time = total_time + (time2 - time1)
146            time3 = time()
147        if debug and (debug_flags & 1):
148            debug.write("\n============= NFA ===========\n")
149            nfa.dump(debug)
150        dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
151        if timings:
152            time4 = time()
153            total_time = total_time + (time4 - time3)
154        if debug and (debug_flags & 2):
155            debug.write("\n============= DFA ===========\n")
156            dfa.dump(debug)
157        if timings:
158            timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
159            timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
160            timings.write("TOTAL            : %5.2f\n" % total_time)
161        self.machine = dfa
162
163    def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
164        try:
165            (re, action_spec) = self.parse_token_definition(token_spec)
166            # Disabled this -- matching empty strings can be useful
167            #if re.nullable:
168            #  raise Errors.InvalidToken(
169            #    token_number, "Pattern can match 0 input symbols")
170            if isinstance(action_spec, Actions.Action):
171                action = action_spec
172            else:
173                try:
174                    action_spec.__call__
175                except AttributeError:
176                    action = Actions.Return(action_spec)
177                else:
178                    action = Actions.Call(action_spec)
179            final_state = machine.new_state()
180            re.build_machine(machine, initial_state, final_state,
181                             match_bol=1, nocase=0)
182            final_state.set_action(action, priority=-token_number)
183        except Errors.PlexError as e:
184            raise e.__class__("Token number %d: %s" % (token_number, e))
185
186    def parse_token_definition(self, token_spec):
187        if not isinstance(token_spec, tuple):
188            raise Errors.InvalidToken("Token definition is not a tuple")
189        if len(token_spec) != 2:
190            raise Errors.InvalidToken("Wrong number of items in token definition")
191        pattern, action = token_spec
192        if not isinstance(pattern, Regexps.RE):
193            raise Errors.InvalidToken("Pattern is not an RE instance")
194        return (pattern, action)
195
196    def get_initial_state(self, name):
197        return self.machine.get_initial_state(name)
198
199
200
201