1 #include <stddef.h>
2 #include <limits>
3 #include <stack>
4 #include <vector>
5 
6 #include "src/dfa/dfa.h"
7 
8 
9 /*
10  * note [finding strongly connected components of DFA]
11  *
12  * A slight modification of Tarjan's algorithm.
13  *
14  * The algorithm traverses the DFA in depth-first order. It maintains a stack
15  * of states that have already been visited but haven't been assigned to an SCC
16  * yet. For each state the algorithm calculates 'lowlink': index of the highest
17  * ancestor state reachable in one step from a descendant of this state.
18  * Lowlink is used to determine when a set of states should be popped off stack
19  * into a new SCC.
20  *
21  * We use lowlink to hold different kinds of information:
22  *   - values in range [0 .. stack size] mean that the state is on stack (a
23  *     link to a state with the smallest index reachable from this one)
24  *   - SCC_UND means that this state has not been visited yet
25  *   - SCC_INF means that this state has already been popped off stack
26  *
27  * We use stack size (rather than topological sort index) as a unique index of
28  * the state on stack. This is safe because the indices of states on stack are
29  * unique and less than the indices of states that have been popped off stack
30  * (SCC_INF).
31  */
32 
33 namespace re2c {
34 namespace {
35 
36 static const size_t SCC_INF = std::numeric_limits<size_t>::max();
37 static const size_t SCC_UND = SCC_INF - 1;
38 
loopback(size_t state,size_t narcs,const size_t * arcs)39 static bool loopback(size_t state, size_t narcs, const size_t *arcs)
40 {
41     for (size_t i = 0; i < narcs; ++i) {
42         if (arcs[i] == state) return true;
43     }
44     return false;
45 }
46 
47 struct StackItem {
48     size_t state;  // current state
49     size_t symbol; // next arc to be visited in this state
50     size_t link;   // Tarjan's "lowlink"
51 };
52 
53 // Tarjan's algorithm
scc(const dfa_t & dfa,std::vector<bool> & trivial,std::vector<StackItem> & stack_dfs)54 static void scc(const dfa_t &dfa, std::vector<bool> &trivial,
55     std::vector<StackItem> &stack_dfs)
56 {
57     std::vector<size_t> lowlink(dfa.states.size(), SCC_UND);
58     std::stack<size_t> stack;
59 
60     StackItem x0 = {0, 0, 0};
61     stack_dfs.push_back(x0);
62 
63     while (!stack_dfs.empty()) {
64         const size_t i = stack_dfs.back().state;
65         size_t c = stack_dfs.back().symbol;
66         size_t link = stack_dfs.back().link;
67         stack_dfs.pop_back();
68 
69         const size_t *arcs = dfa.states[i]->arcs;
70 
71         if (c == 0) {
72             // DFS recursive enter
73             DASSERT(lowlink[i] == SCC_UND);
74             link = lowlink[i] = stack.size();
75             stack.push(i);
76         }
77         else {
78             // DFS recursive return (from one of successor states)
79             const size_t j = arcs[c - 1];
80             DASSERT(lowlink[j] != SCC_UND);
81             lowlink[i] = std::min(lowlink[i], lowlink[j]);
82         }
83 
84         // find the next successor state that hasn't been visited yet
85         for (; c < dfa.nchars; ++c) {
86             const size_t j = arcs[c];
87             if (j != dfa_t::NIL) {
88                 if (lowlink[j] == SCC_UND) {
89                     break;
90                 }
91                 lowlink[i] = std::min(lowlink[i], lowlink[j]);
92             }
93         }
94 
95         if (c < dfa.nchars) {
96             // recurse into the next successor state
97             StackItem x1 = {i, c + 1, link};
98             stack_dfs.push_back(x1);
99             StackItem x2 = {arcs[c], 0, SCC_UND};
100             stack_dfs.push_back(x2);
101         }
102         else if (lowlink[i] == link) {
103             // all successors have been visited
104             // SCC is non-trivial (has loops) if either:
105             //   - it contains multiple interconnected states
106             //   - it contains a single self-looping state
107             trivial[i] = i == stack.top() && !loopback(i, dfa.nchars, arcs);
108 
109             for (;;) {
110                 const size_t j = stack.top();
111                 stack.pop();
112                 lowlink[j] = SCC_INF;
113                 if (i == j) break;
114             }
115         }
116     }
117 }
118 
calc_fill(const dfa_t & dfa,const std::vector<bool> & trivial,std::vector<StackItem> & stack_dfs,std::vector<size_t> & fill)119 static void calc_fill(const dfa_t &dfa, const std::vector<bool> &trivial,
120     std::vector<StackItem> &stack_dfs, std::vector<size_t> &fill)
121 {
122     const size_t nstates = dfa.states.size();
123     fill.resize(nstates, SCC_UND);
124 
125     StackItem x0 = {0, 0, SCC_INF};
126     stack_dfs.push_back(x0);
127 
128     while (!stack_dfs.empty()) {
129         const size_t i = stack_dfs.back().state;
130         size_t c = stack_dfs.back().symbol;
131         stack_dfs.pop_back();
132 
133         const size_t *arcs = dfa.states[i]->arcs;
134 
135         if (c == 0) {
136             // DFS recursive enter
137             if (fill[i] != SCC_UND) continue;
138             fill[i] = 0;
139         }
140         else {
141             // DFS recursive return (from one of successor states)
142             const size_t j = arcs[c - 1];
143             DASSERT(fill[i] != SCC_UND && fill[j] != SCC_UND);
144             fill[i] = std::max(fill[i], 1 + (trivial[j] ? fill[j] : 0));
145         }
146 
147         // find the next successor state that hasn't been visited yet
148         for (; c < dfa.nchars; ++c) {
149             const size_t j = arcs[c];
150             if (j != dfa_t::NIL) break;
151         }
152 
153         if (c < dfa.nchars) {
154             // recurse into the next successor state
155             StackItem x1 = {i, c + 1, SCC_INF};
156             stack_dfs.push_back(x1);
157             StackItem x2 = {arcs[c], 0, SCC_INF};
158             stack_dfs.push_back(x2);
159         }
160     }
161 
162     // The following states must trigger YYFILL:
163     //   - inital state
164     //   - all states in non-trivial SCCs
165     // for other states, reset YYFILL argument to zero
166     for (size_t i = 1; i < nstates; ++i) {
167         if (trivial[i]) {
168             fill[i] = 0;
169         }
170     }
171 }
172 
173 } // anonymous namespace
174 
fillpoints(const dfa_t & dfa,std::vector<size_t> & fill)175 void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
176 {
177     const size_t nstates = dfa.states.size();
178     std::vector<bool> trivial(nstates, false);
179     std::vector<StackItem> stack_dfs;
180     stack_dfs.reserve(nstates);
181 
182     // find DFA states that belong to non-trivial SCC
183     scc(dfa, trivial, stack_dfs);
184 
185     // for each DFA state, calculate YYFILL argument:
186     // maximal path length to the next YYFILL state
187     calc_fill(dfa, trivial, stack_dfs, fill);
188 }
189 
190 } // namespace re2c
191