1 #include "src/util/c99_stdint.h"
2 #include <algorithm>
3 #include <memory>
4 #include <set>
5 #include <string>
6 #include <valarray>
7 #include <vector>
8 
9 #include "src/options/opt.h"
10 #include "src/debug/debug.h"
11 #include "src/dfa/dfa.h"
12 #include "src/dfa/determinization.h"
13 #include "src/dfa/tcmd.h"
14 #include "src/msg/warn.h"
15 #include "src/nfa/nfa.h"
16 #include "src/regexp/rule.h"
17 #include "src/regexp/tag.h"
18 #include "src/util/range.h"
19 
20 
21 namespace re2c {
22 
23 template<typename ctx_t> static void determinization(ctx_t &ctx);
24 template<typename ctx_t> static void clear_caches(ctx_t &ctx);
25 template<typename ctx_t> static void warn_nondeterministic_tags(const ctx_t &ctx);
26 
27 const uint32_t dfa_t::NIL = ~0u;
28 
dfa_t(const nfa_t & nfa,size_t def_rule,size_t eof_rule)29 dfa_t::dfa_t(const nfa_t &nfa, size_t def_rule, size_t eof_rule)
30     : states()
31     , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges
32     , charset(nfa.charset)
33     , rules(nfa.rules)
34     , tags(nfa.tags)
35     , mtagvers(*new std::set<tagver_t>)
36     , finvers(NULL)
37     , tcpool(*new tcpool_t)
38     , maxtagver(0)
39     , tcmd0(NULL)
40     , tcid0(TCID0)
41     , def_rule(def_rule)
42     , eof_rule(eof_rule)
43 {}
44 
determinization(const nfa_t & nfa,dfa_t & dfa,const opt_t * opts,Msg & msg,const std::string & cond)45 void determinization(const nfa_t &nfa, dfa_t &dfa, const opt_t *opts, Msg &msg,
46     const std::string &cond)
47 {
48     if (opts->posix_semantics) {
49         pdetctx_t ctx(opts, msg, cond, nfa, dfa);
50         determinization(ctx);
51     }
52     else {
53         ldetctx_t ctx(opts, msg, cond, nfa, dfa);
54         determinization(ctx);
55     }
56 }
57 
~dfa_t()58 dfa_t::~dfa_t()
59 {
60     std::vector<dfa_state_t*>::iterator
61         i = states.begin(),
62         e = states.end();
63     for (; i != e; ++i)
64     {
65         delete *i;
66     }
67 }
68 
69 template<typename ctx_t>
determinization(ctx_t & ctx)70 void determinization(ctx_t &ctx)
71 {
72     const uint32_t INITIAL_TAGS = init_tag_versions(ctx);
73 
74     // initial state
75     const clos_t c0(ctx.nfa.root, 0, INITIAL_TAGS, HROOT, HROOT);
76     ctx.reach.push_back(c0);
77     tagged_epsilon_closure(ctx);
78     find_state(ctx);
79 
80     // iterate while new kernels are added: for each alphabet symbol,
81     // build tagged epsilon-closure of all reachable NFA states,
82     // then find identical or mappable DFA state or add a new one
83     for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) {
84         ctx.dc_origin = i;
85         clear_caches(ctx);
86 
87         for (uint32_t c = 0; c < ctx.dfa.nchars; ++c) {
88             reach_on_symbol(ctx, c);
89             tagged_epsilon_closure(ctx);
90             find_state(ctx);
91         }
92     }
93 
94     warn_nondeterministic_tags(ctx);
95 }
96 
97 template<typename ctx_t>
clear_caches(ctx_t & ctx)98 void clear_caches(ctx_t &ctx)
99 {
100     ctx.dc_newvers.clear();
101 
102     const size_t ntags = ctx.nfa.tags.size();
103     for (size_t t = 0; t < ntags; ++t) {
104         ctx.dc_hc_caches[t].clear();
105     }
106 }
107 
108 template<typename ctx_t>
reach_on_symbol(ctx_t & ctx,uint32_t sym)109 void reach_on_symbol(ctx_t &ctx, uint32_t sym)
110 {
111     ctx.dc_symbol = sym;
112     const uint32_t symbol = ctx.dfa.charset[sym];
113 
114     const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin];
115     ctx.oldprectbl = kernel->prectbl;
116     ctx.oldprecdim = kernel->size;
117 
118     closure_t &reach = ctx.reach;
119     reach.clear();
120 
121     // Add configurations in reverse order: leftmost greedy closure uses
122     // the resulting array as stack, and POSIX closure doesn't care (GOR1
123     // pre-sorts configurations, and GTOP uses priority queue).
124     for (uint32_t i = static_cast<uint32_t>(kernel->size); i --> 0; ) {
125         nfa_state_t *s = transition(kernel->state[i], symbol);
126         if (s) {
127             const uint32_t v = ctx.dc_opts->stadfa ? 0 : kernel->tvers[i];
128             const clos_t c(s, i, v, kernel->thist[i], HROOT);
129             reach.push_back(c);
130         }
131     }
132 }
133 
transition(nfa_state_t * state,uint32_t symbol)134 nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
135 {
136     if (state->type != nfa_state_t::RAN) {
137         return NULL;
138     }
139     for (const Range *r = state->ran.ran; r; r = r->next()) {
140         if ((r->lower() <= symbol) && (symbol < r->upper())) {
141             return state->ran.out;
142         }
143     }
144     return NULL;
145 }
146 
147 template<typename ctx_t>
init_tag_versions(ctx_t & ctx)148 uint32_t init_tag_versions(ctx_t &ctx)
149 {
150     dfa_t &dfa = ctx.dfa;
151     const size_t ntags = dfa.tags.size();
152 
153     // all-zero tag configuration must have static number zero
154     ctx.dc_tagvertbl.insert_const(TAGVER_ZERO);
155     DASSERT(ZERO_TAGS == ctx.dc_tagvertbl.insert_const(TAGVER_ZERO));
156 
157     // initial tag versions: [1 .. N]
158     const tcid_t INITIAL_TAGS = ctx.dc_tagvertbl.insert_succ(1);
159 
160     // other versions: [ .. -(N + 1)] and [N + 1 .. ]
161     dfa.maxtagver = static_cast<tagver_t>(ntags);
162 
163     // final/fallback versions will be assigned on the go
164     dfa.finvers = new tagver_t[ntags];
165     for (size_t i = 0; i < ntags; ++i) {
166         dfa.finvers[i] = fixed(dfa.tags[i]) ? TAGVER_ZERO : ++dfa.maxtagver;
167     }
168 
169     // mark tags with history (initial and final)
170     for (size_t i = 0; i < ntags; ++i) {
171         if (history(dfa.tags[i])) {
172             tagver_t v = static_cast<tagver_t>(i) + 1, f = dfa.finvers[i];
173             if (f != TAGVER_ZERO) {
174                 dfa.mtagvers.insert(f);
175             }
176             dfa.mtagvers.insert(v);
177         }
178     }
179 
180     return INITIAL_TAGS;
181 }
182 
183 // For each tag, find maximal number of parallel versions of this tag
184 // used in each kernel (degree of non-determinism) and warn about tags with
185 // maximum degree two or more.
186 // WARNING: this function assumes that kernel items are grouped by rule
187 template<typename ctx_t>
warn_nondeterministic_tags(const ctx_t & ctx)188 void warn_nondeterministic_tags(const ctx_t &ctx)
189 {
190     if (ctx.dc_opts->posix_syntax
191         || ctx.dc_opts->stadfa) return;
192 
193     Warn &warn = ctx.dc_msg.warn;
194     const kernels_t &kernels = ctx.dc_kernels;
195     const std::vector<Tag> &tags = ctx.dfa.tags;
196     const std::valarray<Rule> &rules = ctx.dfa.rules;
197 
198     const size_t
199         ntag = tags.size(),
200         nkrn = kernels.size(),
201         nrule = rules.size();
202     std::vector<size_t> maxv(ntag, 0);
203     std::set<tagver_t> uniq;
204 
205     for (uint32_t i = 0; i < nkrn; ++i) {
206         const kernel_t *k = kernels[i];
207         nfa_state_t **s = k->state;
208         const size_t n = k->size;
209         const uint32_t *v = k->tvers;
210 
211         for (size_t u = 0; u < n;) {
212             const size_t r = s[u]->rule;
213             const Rule &rule = rules[r];
214 
215             const size_t l = u;
216             for (; ++u < n && s[u]->rule == r;);
217             for (size_t t = rule.ltag; t < rule.htag; ++t) {
218                 uniq.clear();
219                 for (size_t m = l; m < u; ++m) {
220                     uniq.insert(ctx.dc_tagvertbl[v[m]][t]);
221                 }
222                 maxv[t] = std::max(maxv[t], uniq.size());
223             }
224         }
225     }
226 
227     for (uint32_t r = 0; r < nrule; ++r) {
228         const Rule &rule = rules[r];
229         for (size_t t = rule.ltag; t < rule.htag; ++t) {
230             const size_t m = maxv[t];
231             if (m > 1) {
232                 warn.nondeterministic_tags(rule.semact->loc, ctx.dc_condname,
233                     tags[t].name, m);
234             }
235         }
236     }
237 }
238 
239 template<typename history_t>
determ_context_t(const opt_t * opts,Msg & msg,const std::string & condname,const nfa_t & nfa,dfa_t & dfa)240 determ_context_t<history_t>::determ_context_t(const opt_t *opts, Msg &msg
241     , const std::string &condname, const nfa_t &nfa, dfa_t &dfa)
242     : dc_opts(opts)
243     , dc_msg(msg)
244     , dc_condname(condname)
245     , nfa(nfa)
246     , dfa(dfa)
247     , dc_allocator()
248     , dc_origin(dfa_t::NIL)
249     , dc_target(dfa_t::NIL)
250     , dc_symbol(0)
251     , dc_actions(NULL)
252     , dc_tagvertbl(nfa.tags.size())
253     , history()
254     , dc_kernels()
255     , dc_buffers()
256     , dc_hc_caches()
257     , dc_newvers(newver_cmp_t<history_t>(history, dc_hc_caches))
258     , dc_path1()
259     , dc_path2()
260     , dc_path3()
261     , dc_tagcount()
262     , stadfa_actions(NULL)
263     , stadfa_tagvers()
264     , reach()
265     , state()
266     , gor1_topsort()
267     , gor1_linear()
268     , gtop_buffer()
269     , gtop_cmp()
270     , gtop_heap(gtop_cmp, gtop_buffer)
271     , newprectbl(NULL)
272     , oldprectbl(NULL)
273     , oldprecdim(0)
274     , histlevel(NULL)
275     , sortcores()
276     , fincount()
277     , worklist()
278     , dump_dfa_tree(*this)
279     , dc_dump(opts)
280     , dc_clstats()
281 {
282     const size_t nstates = nfa.size;
283     const size_t ncores = nfa.ncores;
284     const size_t ntags = nfa.tags.size();
285 
286     reach.reserve(nstates);
287     state.reserve(nstates);
288 
289     dc_hc_caches.resize(ntags);
290     dc_path1.reserve(ntags);
291     dc_path2.reserve(ntags);
292     dc_path3.reserve(ntags);
293     dc_tagcount.resize(ntags);
294 
295     if (opts->posix_semantics) {
296         newprectbl = new prectable_t[ncores * ncores];
297         histlevel = new histleaf_t[ncores];
298         sortcores.reserve(ncores);
299         fincount.resize(ncores + 1);
300         worklist.reserve(nstates);
301     }
302 
303     if (opts->posix_closure == POSIX_CLOSURE_GTOP) {
304         gtop_buffer.reserve(nstates);
305     }
306     else {
307         gor1_topsort.reserve(nstates);
308         gor1_linear.reserve(nstates);
309     }
310 }
311 
312 template<typename history_t>
~determ_context_t()313 determ_context_t<history_t>::~determ_context_t()
314 {
315     delete[] newprectbl;
316     delete[] histlevel;
317 }
318 
319 // explicit instantiation for context types
320 template void reach_on_symbol<ldetctx_t>(ldetctx_t &ctx, uint32_t sym);
321 template void reach_on_symbol<pdetctx_t>(pdetctx_t &ctx, uint32_t sym);
322 template uint32_t init_tag_versions<ldetctx_t>(ldetctx_t &ctx);
323 template uint32_t init_tag_versions<pdetctx_t>(pdetctx_t &ctx);
324 template determ_context_t<lhistory_t>::~determ_context_t();
325 template determ_context_t<phistory_t>::~determ_context_t();
326 template determ_context_t<lhistory_t>::determ_context_t(const opt_t*, Msg&,
327     const std::string&, const nfa_t&, dfa_t&);
328 template determ_context_t<phistory_t>::determ_context_t(const opt_t*, Msg&,
329     const std::string&, const nfa_t&, dfa_t&);
330 
331 } // namespace re2c
332 
333