1//===- Automaton.td ----------------------------------------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the key top-level classes needed to produce a reasonably
10// generic finite-state automaton.
11//
12//===----------------------------------------------------------------------===//
13
14// Define a record inheriting from GenericAutomaton to generate a reasonably
15// generic finite-state automaton over a set of actions and states.
16//
17// This automaton is defined by:
18//   1) a state space (explicit, always bits<32>).
19//   2) a set of input symbols (actions, explicit) and
20//   3) a transition function from state + action -> state.
21//
22// A theoretical automaton is defined by <Q, S, d, q0, F>:
23//   Q: A set of possible states.
24//   S: (sigma) The input alphabet.
25//   d: (delta) The transition function f(q in Q, s in S) -> q' in Q.
26//   F: The set of final (accepting) states.
27//
28// Because generating all possible states is tedious, we instead define the
29// transition function only and crawl all reachable states starting from the
30// initial state with all inputs under all transitions until termination.
31//
32// We define F = S, that is, all valid states are accepting.
33//
34// To ensure the generation of the automaton terminates, the state transitions
35// are defined as a lattice (meaning every transitioned-to state is more
36// specific than the transitioned-from state, for some definition of specificity).
37// Concretely a transition may set one or more bits in the state that were
38// previously zero to one. If any bit was not zero, the transition is invalid.
39//
40// Instead of defining all possible states (which would be cumbersome), the user
41// provides a set of possible Transitions from state A, consuming an input
42// symbol A to state B. The Transition object transforms state A to state B and
43// acts as a predicate. This means the state space can be discovered by crawling
44// all the possible transitions until none are valid.
45//
46// This automaton is considered to be nondeterministic, meaning that multiple
47// transitions can occur from any (state, action) pair. The generated automaton
48// is determinized, meaning that is executes in O(k) time where k is the input
49// sequence length.
50//
51// In addition to a generated automaton that determines if a sequence of inputs
52// is accepted or not, a table is emitted that allows determining a plausible
53// sequence of states traversed to accept that input.
54class GenericAutomaton {
55  // Name of a class that inherits from Transition. All records inheriting from
56  // this class will be considered when constructing the automaton.
57  string TransitionClass;
58
59  // Names of fields within TransitionClass that define the action symbol. This
60  // defines the action as an N-tuple.
61  //
62  // Each symbol field can be of class, int, string or code type.
63  //   If the type of a field is a class, the Record's name is used verbatim
64  //     in C++ and the class name is used as the C++ type name.
65  //   If the type of a field is a string, code or int, that is also used
66  //     verbatim in C++.
67  //
68  // To override the C++ type name for field F, define a field called TypeOf_F.
69  // This should be a string that will be used verbatim in C++.
70  //
71  // As an example, to define a 2-tuple with an enum and a string, one might:
72  //   def MyTransition : Transition {
73  //     MyEnum S1;
74  //     int S2;
75  //   }
76  //   def MyAutomaton : GenericAutomaton }{
77  //     let TransitionClass = "Transition";
78  //     let SymbolFields = ["S1", "S2"];
79  //     let TypeOf_S1 = "MyEnumInCxxKind";
80  //   }
81  list<string> SymbolFields;
82}
83
84// All transitions inherit from Transition.
85class Transition {
86  // A transition S' = T(S) is valid if, for every set bit in NewState, the
87  // corresponding bit in S is clear. That is:
88  //   def T(S):
89  //     S' = S | NewState
90  //     return S' if S' != S else Failure
91  //
92  // The automaton generator uses this property to crawl the set of possible
93  // transitions from a starting state of 0b0.
94  bits<32> NewState;
95}
96