1 #include <stddef.h>
2 #include <limits>
3 #include <stack>
4 #include <vector>
5
6 #include "src/dfa/dfa.h"
7
8
9 /*
10 * note [finding strongly connected components of DFA]
11 *
12 * A slight modification of Tarjan's algorithm.
13 *
14 * The algorithm traverses the DFA in depth-first order. It maintains a stack
15 * of states that have already been visited but haven't been assigned to an SCC
16 * yet. For each state the algorithm calculates 'lowlink': index of the highest
17 * ancestor state reachable in one step from a descendant of this state.
18 * Lowlink is used to determine when a set of states should be popped off stack
19 * into a new SCC.
20 *
21 * We use lowlink to hold different kinds of information:
22 * - values in range [0 .. stack size] mean that the state is on stack (a
23 * link to a state with the smallest index reachable from this one)
24 * - SCC_UND means that this state has not been visited yet
25 * - SCC_INF means that this state has already been popped off stack
26 *
27 * We use stack size (rather than topological sort index) as a unique index of
28 * the state on stack. This is safe because the indices of states on stack are
29 * unique and less than the indices of states that have been popped off stack
30 * (SCC_INF).
31 */
32
33 namespace re2c {
34 namespace {
35
36 static const size_t SCC_INF = std::numeric_limits<size_t>::max();
37 static const size_t SCC_UND = SCC_INF - 1;
38
loopback(size_t state,size_t narcs,const size_t * arcs)39 static bool loopback(size_t state, size_t narcs, const size_t *arcs)
40 {
41 for (size_t i = 0; i < narcs; ++i) {
42 if (arcs[i] == state) return true;
43 }
44 return false;
45 }
46
47 struct StackItem {
48 size_t state; // current state
49 size_t symbol; // next arc to be visited in this state
50 size_t link; // Tarjan's "lowlink"
51 };
52
53 // Tarjan's algorithm
scc(const dfa_t & dfa,std::vector<bool> & trivial,std::vector<StackItem> & stack_dfs)54 static void scc(const dfa_t &dfa, std::vector<bool> &trivial,
55 std::vector<StackItem> &stack_dfs)
56 {
57 std::vector<size_t> lowlink(dfa.states.size(), SCC_UND);
58 std::stack<size_t> stack;
59
60 StackItem x0 = {0, 0, 0};
61 stack_dfs.push_back(x0);
62
63 while (!stack_dfs.empty()) {
64 const size_t i = stack_dfs.back().state;
65 size_t c = stack_dfs.back().symbol;
66 size_t link = stack_dfs.back().link;
67 stack_dfs.pop_back();
68
69 const size_t *arcs = dfa.states[i]->arcs;
70
71 if (c == 0) {
72 // DFS recursive enter
73 DASSERT(lowlink[i] == SCC_UND);
74 link = lowlink[i] = stack.size();
75 stack.push(i);
76 }
77 else {
78 // DFS recursive return (from one of successor states)
79 const size_t j = arcs[c - 1];
80 DASSERT(lowlink[j] != SCC_UND);
81 lowlink[i] = std::min(lowlink[i], lowlink[j]);
82 }
83
84 // find the next successor state that hasn't been visited yet
85 for (; c < dfa.nchars; ++c) {
86 const size_t j = arcs[c];
87 if (j != dfa_t::NIL) {
88 if (lowlink[j] == SCC_UND) {
89 break;
90 }
91 lowlink[i] = std::min(lowlink[i], lowlink[j]);
92 }
93 }
94
95 if (c < dfa.nchars) {
96 // recurse into the next successor state
97 StackItem x1 = {i, c + 1, link};
98 stack_dfs.push_back(x1);
99 StackItem x2 = {arcs[c], 0, SCC_UND};
100 stack_dfs.push_back(x2);
101 }
102 else if (lowlink[i] == link) {
103 // all successors have been visited
104 // SCC is non-trivial (has loops) if either:
105 // - it contains multiple interconnected states
106 // - it contains a single self-looping state
107 trivial[i] = i == stack.top() && !loopback(i, dfa.nchars, arcs);
108
109 for (;;) {
110 const size_t j = stack.top();
111 stack.pop();
112 lowlink[j] = SCC_INF;
113 if (i == j) break;
114 }
115 }
116 }
117 }
118
calc_fill(const dfa_t & dfa,const std::vector<bool> & trivial,std::vector<StackItem> & stack_dfs,std::vector<size_t> & fill)119 static void calc_fill(const dfa_t &dfa, const std::vector<bool> &trivial,
120 std::vector<StackItem> &stack_dfs, std::vector<size_t> &fill)
121 {
122 const size_t nstates = dfa.states.size();
123 fill.resize(nstates, SCC_UND);
124
125 StackItem x0 = {0, 0, SCC_INF};
126 stack_dfs.push_back(x0);
127
128 while (!stack_dfs.empty()) {
129 const size_t i = stack_dfs.back().state;
130 size_t c = stack_dfs.back().symbol;
131 stack_dfs.pop_back();
132
133 const size_t *arcs = dfa.states[i]->arcs;
134
135 if (c == 0) {
136 // DFS recursive enter
137 if (fill[i] != SCC_UND) continue;
138 fill[i] = 0;
139 }
140 else {
141 // DFS recursive return (from one of successor states)
142 const size_t j = arcs[c - 1];
143 DASSERT(fill[i] != SCC_UND && fill[j] != SCC_UND);
144 fill[i] = std::max(fill[i], 1 + (trivial[j] ? fill[j] : 0));
145 }
146
147 // find the next successor state that hasn't been visited yet
148 for (; c < dfa.nchars; ++c) {
149 const size_t j = arcs[c];
150 if (j != dfa_t::NIL) break;
151 }
152
153 if (c < dfa.nchars) {
154 // recurse into the next successor state
155 StackItem x1 = {i, c + 1, SCC_INF};
156 stack_dfs.push_back(x1);
157 StackItem x2 = {arcs[c], 0, SCC_INF};
158 stack_dfs.push_back(x2);
159 }
160 }
161
162 // The following states must trigger YYFILL:
163 // - inital state
164 // - all states in non-trivial SCCs
165 // for other states, reset YYFILL argument to zero
166 for (size_t i = 1; i < nstates; ++i) {
167 if (trivial[i]) {
168 fill[i] = 0;
169 }
170 }
171 }
172
173 } // anonymous namespace
174
fillpoints(const dfa_t & dfa,std::vector<size_t> & fill)175 void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
176 {
177 const size_t nstates = dfa.states.size();
178 std::vector<bool> trivial(nstates, false);
179 std::vector<StackItem> stack_dfs;
180 stack_dfs.reserve(nstates);
181
182 // find DFA states that belong to non-trivial SCC
183 scc(dfa, trivial, stack_dfs);
184
185 // for each DFA state, calculate YYFILL argument:
186 // maximal path length to the next YYFILL state
187 calc_fill(dfa, trivial, stack_dfs, fill);
188 }
189
190 } // namespace re2c
191