1 //===--------------------- DfaEmitter.h -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Defines a generic automaton builder. This takes a set of transitions and
9 // states that represent a nondeterministic finite state automaton (NFA) and
10 // emits a determinized DFA in a form that include/llvm/Support/Automaton.h can
11 // drive.
12 //
13 // See file llvm/TableGen/Automaton.td for the TableGen API definition.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H
18 #define LLVM_UTILS_TABLEGEN_DFAEMITTER_H
19 
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/UniqueVector.h"
22 #include <map>
23 #include <set>
24 #include <utility>
25 #include <vector>
26 
27 namespace llvm {
28 
29 class raw_ostream;
30 class StringRef;
31 
32 /// Construct a deterministic finite state automaton from possible
33 /// nondeterministic state and transition data.
34 ///
35 /// The state type is a 64-bit unsigned integer. The generated automaton is
36 /// invariant to the sparsity of the state representation - its size is only
37 /// a function of the cardinality of the set of states.
38 ///
39 /// The inputs to this emitter are considered to define a nondeterministic
40 /// finite state automaton (NFA). This is then converted to a DFA during
41 /// emission. The emitted tables can be used to by
42 /// include/llvm/Support/Automaton.h.
43 class DfaEmitter {
44 public:
45   // The type of an NFA state. The initial state is always zero.
46   using state_type = uint64_t;
47   // The type of an action.
48   using action_type = uint64_t;
49 
50   DfaEmitter() = default;
51   virtual ~DfaEmitter() = default;
52 
53   void addTransition(state_type From, state_type To, action_type A);
54   void emit(StringRef Name, raw_ostream &OS);
55 
56 protected:
57   /// Emit the C++ type of an action to OS.
58   virtual void printActionType(raw_ostream &OS);
59   /// Emit the C++ value of an action A to OS.
60   virtual void printActionValue(action_type A, raw_ostream &OS);
61 
62 private:
63   /// The state type of deterministic states. These are only used internally to
64   /// this class. This is an ID into the DfaStates UniqueVector.
65   using dfa_state_type = unsigned;
66 
67   /// The actual representation of a DFA state, which is a union of one or more
68   /// NFA states.
69   using DfaState = SmallVector<state_type, 4>;
70 
71   /// A DFA transition consists of a set of NFA states transitioning to a
72   /// new set of NFA states. The DfaTransitionInfo tracks, for every
73   /// transitioned-from NFA state, a set of valid transitioned-to states.
74   ///
75   /// Emission of this transition relation allows algorithmic determination of
76   /// the possible candidate NFA paths taken under a given input sequence to
77   /// reach a given DFA state.
78   using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>;
79 
80   /// The set of all possible actions.
81   std::set<action_type> Actions;
82 
83   /// The set of nondeterministic transitions. A state-action pair can
84   /// transition to multiple target states.
85   std::map<std::pair<state_type, action_type>, std::vector<state_type>>
86       NfaTransitions;
87   std::set<state_type> NfaStates;
88   unsigned NumNfaTransitions = 0;
89 
90   /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID,
91   /// which is dfa_state_type. Note that because UniqueVector reserves state
92   /// zero, the initial DFA state is always 1.
93   UniqueVector<DfaState> DfaStates;
94   /// The set of deterministic transitions. A state-action pair has only a
95   /// single target state.
96   std::map<std::pair<dfa_state_type, action_type>,
97            std::pair<dfa_state_type, DfaTransitionInfo>>
98       DfaTransitions;
99 
100   /// Visit all NFA states and construct the DFA.
101   void constructDfa();
102   /// Visit a single DFA state and construct all possible transitions to new DFA
103   /// states.
104   void visitDfaState(const DfaState &DS);
105 };
106 
107 } // namespace llvm
108 
109 #endif
110