1 #ifndef _RE2C_DFA_CLOSURE_POSIX_
2 #define _RE2C_DFA_CLOSURE_POSIX_
3
4 #include <queue>
5
6 #include "src/dfa/determinization.h"
7 #include "src/dfa/posix_precedence.h"
8 #include "src/nfa/nfa.h"
9
10
11 namespace re2c {
12
13 /*
14 * States of in-degree less than 2 are not joint points;
15 * the fact that we are re-scanning this state means that we found
16 * a better path to some previous state. Due to the right distributivity
17 * of path comparison over path concatenation (X < Y => XZ < YZ) we
18 * can just propagate the new path up to the next join point.
19 */
20
21 template<typename ctx_t> void closure_cleanup(nfa_state_t *q);
22 template<typename ctx_t> static void closure_posix_gor1(ctx_t &);
23 template<typename ctx_t> static void closure_posix_gtop(ctx_t &);
24
25 // we *do* want these to be inlined
26 template<typename ctx_t> static inline void init_gor1(ctx_t &ctx);
27 template<typename ctx_t> static inline bool scan(ctx_t &ctx, nfa_state_t *q, bool all);
28 template<typename ctx_t> static inline bool relax_gor1(ctx_t &, const typename ctx_t::conf_t &);
29 template<typename ctx_t> static inline void relax_gtop(ctx_t &, const typename ctx_t::conf_t &);
30
closure_posix(pdetctx_t & ctx)31 inline void closure_posix(pdetctx_t &ctx)
32 {
33 DRESET_CLSTATS(ctx);
34
35 ctx.history.detach();
36
37 switch (ctx.dc_opts->posix_closure) {
38 case POSIX_CLOSURE_GOR1: closure_posix_gor1(ctx); break;
39 case POSIX_CLOSURE_GTOP: closure_posix_gtop(ctx); break;
40 }
41
42 DDUMP_CLSTATS(ctx);
43 }
44
45 template<typename ctx_t>
46 struct cmp_gor1_t
47 {
48 ctx_t &ctx;
49
cmp_gor1_tcmp_gor1_t50 cmp_gor1_t(ctx_t &ctx) : ctx(ctx) {}
51
operatorcmp_gor1_t52 bool operator()
53 ( const typename ctx_t::conf_t &x
54 , const typename ctx_t::conf_t &y) const
55 {
56 // if longest components differ, leftmost already incorporates that
57 const uint32_t xo = x.origin, yo = y.origin;
58 return xo != yo
59 && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0;
60 }
61 };
62
63 /*
64 * note [GOR1 SSSP algorithm]
65 * Cherkassky-Goldberg-Radzik Single Source Shortest Path algorithm.
66 *
67 * Papers:
68 * - "A heuristic improvement of the Bellman-Ford algorithm"
69 * by Goldberg, Radzik (1993)
70 * - "Shortest paths algorithms: Theory and experimental evaluation"
71 * by Cherkassky, Goldberg, Radzik (1996)
72 *
73 * Complexity for digraph G = (V, E) is O(|V| * |E|), and O(|V| + |E|)
74 * in case of acyclic graph.
75 */
76 template<typename ctx_t>
closure_posix_gor1(ctx_t & ctx)77 void closure_posix_gor1(ctx_t &ctx)
78 {
79 std::vector<nfa_state_t*>
80 &topsort = ctx.gor1_topsort,
81 &linear = ctx.gor1_linear;
82
83 init_gor1(ctx);
84
85 for (; !topsort.empty(); ) {
86
87 // 1st pass: depth-first postorder traversal of admissible subgraph
88 for (; !topsort.empty(); ) {
89 nfa_state_t *q = topsort.back();
90 if (q->status == GOR_LINEAR) {
91 topsort.pop_back();
92 }
93 else {
94 q->status = GOR_TOPSORT;
95 DINCCOUNT_CLSCANS(ctx);
96 if (!scan(ctx, q, false)) {
97 q->status = GOR_LINEAR;
98 topsort.pop_back();
99 linear.push_back(q);
100 }
101 }
102 }
103
104 // 2nd pass: linear scan of topologically ordered states
105 for (; !linear.empty(); ) {
106 nfa_state_t *q = linear.back();
107 linear.pop_back();
108 if (q->active) {
109 q->active = 0;
110 q->arcidx = 0;
111 DINCCOUNT_CLSCANS(ctx);
112 scan(ctx, q, true);
113 }
114 q->status = GOR_NOPASS;
115 }
116 }
117 }
118
119 template<typename ctx_t>
init_gor1(ctx_t & ctx)120 void init_gor1(ctx_t &ctx)
121 {
122 typename ctx_t::confset_t &state = ctx.state, &reach = ctx.reach;
123 std::vector<nfa_state_t*> &topsort = ctx.gor1_topsort;
124
125 // init: push configurations ordered by POSIX precedence (highest on top)
126 state.clear();
127 std::sort(reach.begin(), reach.end(), cmp_gor1_t<ctx_t>(ctx));
128 typename ctx_t::rcconfiter_t c = reach.rbegin(), e = reach.rend();
129 for (; c != e; ++c) {
130 nfa_state_t *q = c->state;
131 if (q->clos == NOCLOS) {
132 q->clos = static_cast<uint32_t>(state.size());
133 state.push_back(*c);
134 }
135 else {
136 state[q->clos] = *c;
137 }
138 topsort.push_back(q);
139 }
140 }
141
142 template<typename ctx_t>
scan(ctx_t & ctx,nfa_state_t * q,bool all)143 bool scan(ctx_t &ctx, nfa_state_t *q, bool all)
144 {
145 bool any = false;
146
147 typedef typename ctx_t::conf_t conf_t;
148 const conf_t x = ctx.state[q->clos];
149
150 switch (q->type) {
151 case nfa_state_t::ALT:
152 if (q->arcidx == 0) {
153 any |= relax_gor1(ctx, conf_t(x, q->alt.out1));
154 ++q->arcidx;
155 }
156 if (q->arcidx == 1 && (!any || all)) {
157 any |= relax_gor1(ctx, conf_t(x, q->alt.out2));
158 ++q->arcidx;
159 }
160 break;
161 case nfa_state_t::TAG:
162 if (q->arcidx == 0) {
163 any |= relax_gor1(ctx, conf_t(x, q->tag.out, ctx.history.link(ctx, x)));
164 ++q->arcidx;
165 }
166 break;
167 case nfa_state_t::RAN:
168 case nfa_state_t::FIN:
169 break;
170 }
171
172 return any;
173 }
174
175 template<typename ctx_t>
relax_gor1(ctx_t & ctx,const typename ctx_t::conf_t & x)176 bool relax_gor1(ctx_t &ctx, const typename ctx_t::conf_t &x)
177 {
178 typename ctx_t::confset_t &state = ctx.state;
179 nfa_state_t *q = x.state;
180 const uint32_t idx = q->clos;
181 int32_t p1, p2;
182
183 if (q->status == GOR_TOPSORT) {
184 return false;
185 }
186
187 if (idx == NOCLOS) {
188 q->clos = static_cast<uint32_t>(state.size());
189 state.push_back(x);
190 }
191 else if (q->indeg < 2
192 || ctx_t::history_t::precedence(ctx, x, state[idx], p1, p2) < 0) {
193 state[idx] = x;
194 }
195 else {
196 return false;
197 }
198
199 if (q->status == GOR_NOPASS) {
200 ctx.gor1_topsort.push_back(q);
201 q->arcidx = 0;
202 return true;
203 }
204 else {
205 q->active = 1;
206 return false;
207 }
208 }
209
210 /*
211 * note [GTOP SSSP algorithm]
212 * Global Topsort Single Source Shortest Path algorithm.
213 *
214 * It is well known that SSSP can be solved in linear time on DAGs (directed
215 * acyclic graphs) by exploring graph nodes in topological order. In our case
216 * TNFA is not a DAG (it may have cycles), but it is possible to compute fake
217 * topologcal order by ignoring back edges.
218 *
219 * The algorithm works by having a priority queue of nodes, where priorities
220 * are indices of nodes in fake topological ordering. At each step, the node
221 * with the minimal priority is popped from queue and explored. All nodes
222 * reachable from it on admissible arcs are enqueued, unless they are already
223 * on queue.
224 *
225 * The resulting algorithm is of course not optimal: it can get stuck on
226 * graphs with loops, because it will give priority to some of the loop nodes
227 * compared to others for no good reason.
228 *
229 * However the algorithm is simple and optimal for DAGs, therefore we keep it.
230 */
231
232 template<typename ctx_t>
closure_posix_gtop(ctx_t & ctx)233 void closure_posix_gtop(ctx_t &ctx)
234 {
235 const typename ctx_t::confset_t &reach = ctx.reach;
236 typename ctx_t::confset_t &state = ctx.state;
237 gtop_heap_t &heap = ctx.gtop_heap;
238
239 state.clear();
240 for (typename ctx_t::cconfiter_t c = reach.begin(); c != reach.end(); ++c) {
241 relax_gtop(ctx, *c);
242 }
243
244 for (; !heap.empty(); ) {
245 nfa_state_t *q = heap.top();
246 heap.pop();
247 q->active = 0;
248 DINCCOUNT_CLSCANS(ctx);
249
250 typedef typename ctx_t::conf_t conf_t;
251 const conf_t x = ctx.state[q->clos];
252
253 switch (q->type) {
254 case nfa_state_t::ALT:
255 relax_gtop(ctx, conf_t(x, q->alt.out1));
256 relax_gtop(ctx, conf_t(x, q->alt.out2));
257 break;
258 case nfa_state_t::TAG:
259 relax_gtop(ctx, conf_t(x, q->tag.out, ctx.history.link(ctx, x)));
260 break;
261 case nfa_state_t::RAN:
262 case nfa_state_t::FIN:
263 break;
264 }
265 }
266 }
267
268 template<typename ctx_t>
relax_gtop(ctx_t & ctx,const typename ctx_t::conf_t & c)269 void relax_gtop(ctx_t &ctx, const typename ctx_t::conf_t &c)
270 {
271 typename ctx_t::confset_t &state = ctx.state;
272 nfa_state_t *q = c.state;
273 const uint32_t idx = q->clos;
274 int32_t p1, p2;
275
276 if (idx == NOCLOS) {
277 q->clos = static_cast<uint32_t>(state.size());
278 state.push_back(c);
279 }
280 else if (q->indeg < 2
281 || ctx_t::history_t::precedence(ctx, c, state[idx], p1, p2) < 0) {
282 state[idx] = c;
283 }
284 else {
285 return;
286 }
287
288 if (!q->active) {
289 q->active = 1;
290 ctx.gtop_heap.push(q);
291 }
292 }
293
294 template<>
295 inline void closure_cleanup<pdetctx_t>(nfa_state_t *q)
296 {
297 q->clos = NOCLOS;
298 q->arcidx = 0;
299 DASSERT(q->status == GOR_NOPASS && q->active == 0);
300 }
301
302 } // namespace re2c
303
304 #endif // _RE2C_DFA_CLOSURE_POSIX_
305
306