1 #ifndef _RE2C_DFA_CLOSURE_POSIX_
2 #define _RE2C_DFA_CLOSURE_POSIX_
3 
4 #include <queue>
5 
6 #include "src/dfa/determinization.h"
7 #include "src/dfa/posix_precedence.h"
8 #include "src/nfa/nfa.h"
9 
10 
11 namespace re2c {
12 
13 /*
14  * States of in-degree less than 2 are not joint points;
15  * the fact that we are re-scanning this state means that we found
16  * a better path to some previous state. Due to the right distributivity
17  * of path comparison over path concatenation (X < Y => XZ < YZ) we
18  * can just propagate the new path up to the next join point.
19  */
20 
21 template<typename ctx_t> void closure_cleanup(nfa_state_t *q);
22 template<typename ctx_t> static void closure_posix_gor1(ctx_t &);
23 template<typename ctx_t> static void closure_posix_gtop(ctx_t &);
24 
25 // we *do* want these to be inlined
26 template<typename ctx_t> static inline void init_gor1(ctx_t &ctx);
27 template<typename ctx_t> static inline bool scan(ctx_t &ctx, nfa_state_t *q, bool all);
28 template<typename ctx_t> static inline bool relax_gor1(ctx_t &, const typename ctx_t::conf_t &);
29 template<typename ctx_t> static inline void relax_gtop(ctx_t &, const typename ctx_t::conf_t &);
30 
closure_posix(pdetctx_t & ctx)31 inline void closure_posix(pdetctx_t &ctx)
32 {
33     DRESET_CLSTATS(ctx);
34 
35     ctx.history.detach();
36 
37     switch (ctx.dc_opts->posix_closure) {
38         case POSIX_CLOSURE_GOR1: closure_posix_gor1(ctx); break;
39         case POSIX_CLOSURE_GTOP: closure_posix_gtop(ctx); break;
40     }
41 
42     DDUMP_CLSTATS(ctx);
43 }
44 
45 template<typename ctx_t>
46 struct cmp_gor1_t
47 {
48     ctx_t &ctx;
49 
cmp_gor1_tcmp_gor1_t50     cmp_gor1_t(ctx_t &ctx) : ctx(ctx) {}
51 
operatorcmp_gor1_t52     bool operator()
53         ( const typename ctx_t::conf_t &x
54         , const typename ctx_t::conf_t &y) const
55     {
56         // if longest components differ, leftmost already incorporates that
57         const uint32_t xo = x.origin, yo = y.origin;
58         return xo != yo
59             && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0;
60     }
61 };
62 
63 /*
64  * note [GOR1 SSSP algorithm]
65  * Cherkassky-Goldberg-Radzik Single Source Shortest Path algorithm.
66  *
67  * Papers:
68  * - "A heuristic improvement of the Bellman-Ford algorithm"
69  *   by Goldberg, Radzik (1993)
70  * - "Shortest paths algorithms: Theory and experimental evaluation"
71  *   by Cherkassky, Goldberg, Radzik (1996)
72  *
73  * Complexity for digraph G = (V, E) is O(|V| * |E|), and O(|V| + |E|)
74  * in case of acyclic graph.
75  */
76 template<typename ctx_t>
closure_posix_gor1(ctx_t & ctx)77 void closure_posix_gor1(ctx_t &ctx)
78 {
79     std::vector<nfa_state_t*>
80         &topsort = ctx.gor1_topsort,
81         &linear = ctx.gor1_linear;
82 
83     init_gor1(ctx);
84 
85     for (; !topsort.empty(); ) {
86 
87         // 1st pass: depth-first postorder traversal of admissible subgraph
88         for (; !topsort.empty(); ) {
89             nfa_state_t *q = topsort.back();
90             if (q->status == GOR_LINEAR) {
91                 topsort.pop_back();
92             }
93             else {
94                 q->status = GOR_TOPSORT;
95                 DINCCOUNT_CLSCANS(ctx);
96                 if (!scan(ctx, q, false)) {
97                     q->status = GOR_LINEAR;
98                     topsort.pop_back();
99                     linear.push_back(q);
100                 }
101             }
102         }
103 
104         // 2nd pass: linear scan of topologically ordered states
105         for (; !linear.empty(); ) {
106             nfa_state_t *q = linear.back();
107             linear.pop_back();
108             if (q->active) {
109                 q->active = 0;
110                 q->arcidx = 0;
111                 DINCCOUNT_CLSCANS(ctx);
112                 scan(ctx, q, true);
113             }
114             q->status = GOR_NOPASS;
115         }
116     }
117 }
118 
119 template<typename ctx_t>
init_gor1(ctx_t & ctx)120 void init_gor1(ctx_t &ctx)
121 {
122     typename ctx_t::confset_t &state = ctx.state, &reach = ctx.reach;
123     std::vector<nfa_state_t*> &topsort = ctx.gor1_topsort;
124 
125     // init: push configurations ordered by POSIX precedence (highest on top)
126     state.clear();
127     std::sort(reach.begin(), reach.end(), cmp_gor1_t<ctx_t>(ctx));
128     typename ctx_t::rcconfiter_t c = reach.rbegin(), e = reach.rend();
129     for (; c != e; ++c) {
130         nfa_state_t *q = c->state;
131         if (q->clos == NOCLOS) {
132             q->clos = static_cast<uint32_t>(state.size());
133             state.push_back(*c);
134         }
135         else {
136             state[q->clos] = *c;
137         }
138         topsort.push_back(q);
139     }
140 }
141 
142 template<typename ctx_t>
scan(ctx_t & ctx,nfa_state_t * q,bool all)143 bool scan(ctx_t &ctx, nfa_state_t *q, bool all)
144 {
145     bool any = false;
146 
147     typedef typename ctx_t::conf_t conf_t;
148     const conf_t x = ctx.state[q->clos];
149 
150     switch (q->type) {
151         case nfa_state_t::ALT:
152             if (q->arcidx == 0) {
153                 any |= relax_gor1(ctx, conf_t(x, q->alt.out1));
154                 ++q->arcidx;
155             }
156             if (q->arcidx == 1 && (!any || all)) {
157                 any |= relax_gor1(ctx, conf_t(x, q->alt.out2));
158                 ++q->arcidx;
159             }
160             break;
161         case nfa_state_t::TAG:
162             if (q->arcidx == 0) {
163                 any |= relax_gor1(ctx, conf_t(x, q->tag.out, ctx.history.link(ctx, x)));
164                 ++q->arcidx;
165             }
166             break;
167         case nfa_state_t::RAN:
168         case nfa_state_t::FIN:
169             break;
170     }
171 
172     return any;
173 }
174 
175 template<typename ctx_t>
relax_gor1(ctx_t & ctx,const typename ctx_t::conf_t & x)176 bool relax_gor1(ctx_t &ctx, const typename ctx_t::conf_t &x)
177 {
178     typename ctx_t::confset_t &state = ctx.state;
179     nfa_state_t *q = x.state;
180     const uint32_t idx = q->clos;
181     int32_t p1, p2;
182 
183     if (q->status == GOR_TOPSORT) {
184         return false;
185     }
186 
187     if (idx == NOCLOS) {
188         q->clos = static_cast<uint32_t>(state.size());
189         state.push_back(x);
190     }
191     else if (q->indeg < 2
192         || ctx_t::history_t::precedence(ctx, x, state[idx], p1, p2) < 0) {
193         state[idx] = x;
194     }
195     else {
196         return false;
197     }
198 
199     if (q->status == GOR_NOPASS) {
200         ctx.gor1_topsort.push_back(q);
201         q->arcidx = 0;
202         return true;
203     }
204     else {
205         q->active = 1;
206         return false;
207     }
208 }
209 
210 /*
211  * note [GTOP SSSP algorithm]
212  * Global Topsort Single Source Shortest Path algorithm.
213  *
214  * It is well known that SSSP can be solved in linear time on DAGs (directed
215  * acyclic graphs) by exploring graph nodes in topological order. In our case
216  * TNFA is not a DAG (it may have cycles), but it is possible to compute fake
217  * topologcal order by ignoring back edges.
218  *
219  * The algorithm works by having a priority queue of nodes, where priorities
220  * are indices of nodes in fake topological ordering. At each step, the node
221  * with the minimal priority is popped from queue and explored. All nodes
222  * reachable from it on admissible arcs are enqueued, unless they are already
223  * on queue.
224  *
225  * The resulting algorithm is of course not optimal: it can get stuck on
226  * graphs with loops, because it will give priority to some of the loop nodes
227  * compared to others for no good reason.
228  *
229  * However the algorithm is simple and optimal for DAGs, therefore we keep it.
230  */
231 
232 template<typename ctx_t>
closure_posix_gtop(ctx_t & ctx)233 void closure_posix_gtop(ctx_t &ctx)
234 {
235     const typename ctx_t::confset_t &reach = ctx.reach;
236     typename ctx_t::confset_t &state = ctx.state;
237     gtop_heap_t &heap = ctx.gtop_heap;
238 
239     state.clear();
240     for (typename ctx_t::cconfiter_t c = reach.begin(); c != reach.end(); ++c) {
241         relax_gtop(ctx, *c);
242     }
243 
244     for (; !heap.empty(); ) {
245         nfa_state_t *q = heap.top();
246         heap.pop();
247         q->active = 0;
248         DINCCOUNT_CLSCANS(ctx);
249 
250         typedef typename ctx_t::conf_t conf_t;
251         const conf_t x = ctx.state[q->clos];
252 
253         switch (q->type) {
254             case nfa_state_t::ALT:
255                 relax_gtop(ctx, conf_t(x, q->alt.out1));
256                 relax_gtop(ctx, conf_t(x, q->alt.out2));
257                 break;
258             case nfa_state_t::TAG:
259                 relax_gtop(ctx, conf_t(x, q->tag.out, ctx.history.link(ctx, x)));
260                 break;
261             case nfa_state_t::RAN:
262             case nfa_state_t::FIN:
263                 break;
264         }
265     }
266 }
267 
268 template<typename ctx_t>
relax_gtop(ctx_t & ctx,const typename ctx_t::conf_t & c)269 void relax_gtop(ctx_t &ctx, const typename ctx_t::conf_t &c)
270 {
271     typename ctx_t::confset_t &state = ctx.state;
272     nfa_state_t *q = c.state;
273     const uint32_t idx = q->clos;
274     int32_t p1, p2;
275 
276     if (idx == NOCLOS) {
277         q->clos = static_cast<uint32_t>(state.size());
278         state.push_back(c);
279     }
280     else if (q->indeg < 2
281         || ctx_t::history_t::precedence(ctx, c, state[idx], p1, p2) < 0) {
282         state[idx] = c;
283     }
284     else {
285         return;
286     }
287 
288     if (!q->active) {
289         q->active = 1;
290         ctx.gtop_heap.push(q);
291     }
292 }
293 
294 template<>
295 inline void closure_cleanup<pdetctx_t>(nfa_state_t *q)
296 {
297     q->clos = NOCLOS;
298     q->arcidx = 0;
299     DASSERT(q->status == GOR_NOPASS && q->active == 0);
300 }
301 
302 } // namespace re2c
303 
304 #endif // _RE2C_DFA_CLOSURE_POSIX_
305 
306