1 #include "src/util/c99_stdint.h"
2 #include <algorithm>
3 #include <memory>
4 #include <set>
5 #include <string>
6 #include <valarray>
7 #include <vector>
8
9 #include "src/options/opt.h"
10 #include "src/debug/debug.h"
11 #include "src/dfa/dfa.h"
12 #include "src/dfa/determinization.h"
13 #include "src/dfa/tcmd.h"
14 #include "src/msg/warn.h"
15 #include "src/nfa/nfa.h"
16 #include "src/regexp/rule.h"
17 #include "src/regexp/tag.h"
18 #include "src/util/range.h"
19
20
21 namespace re2c {
22
23 template<typename ctx_t> static void determinization(ctx_t &ctx);
24 template<typename ctx_t> static void clear_caches(ctx_t &ctx);
25 template<typename ctx_t> static void warn_nondeterministic_tags(const ctx_t &ctx);
26
27 const uint32_t dfa_t::NIL = ~0u;
28
dfa_t(const nfa_t & nfa,size_t def_rule,size_t eof_rule)29 dfa_t::dfa_t(const nfa_t &nfa, size_t def_rule, size_t eof_rule)
30 : states()
31 , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges
32 , charset(nfa.charset)
33 , rules(nfa.rules)
34 , tags(nfa.tags)
35 , mtagvers(*new std::set<tagver_t>)
36 , finvers(NULL)
37 , tcpool(*new tcpool_t)
38 , maxtagver(0)
39 , tcmd0(NULL)
40 , tcid0(TCID0)
41 , def_rule(def_rule)
42 , eof_rule(eof_rule)
43 {}
44
determinization(const nfa_t & nfa,dfa_t & dfa,const opt_t * opts,Msg & msg,const std::string & cond)45 void determinization(const nfa_t &nfa, dfa_t &dfa, const opt_t *opts, Msg &msg,
46 const std::string &cond)
47 {
48 if (opts->posix_semantics) {
49 pdetctx_t ctx(opts, msg, cond, nfa, dfa);
50 determinization(ctx);
51 }
52 else {
53 ldetctx_t ctx(opts, msg, cond, nfa, dfa);
54 determinization(ctx);
55 }
56 }
57
~dfa_t()58 dfa_t::~dfa_t()
59 {
60 std::vector<dfa_state_t*>::iterator
61 i = states.begin(),
62 e = states.end();
63 for (; i != e; ++i)
64 {
65 delete *i;
66 }
67 }
68
69 template<typename ctx_t>
determinization(ctx_t & ctx)70 void determinization(ctx_t &ctx)
71 {
72 const uint32_t INITIAL_TAGS = init_tag_versions(ctx);
73
74 // initial state
75 const clos_t c0(ctx.nfa.root, 0, INITIAL_TAGS, HROOT, HROOT);
76 ctx.reach.push_back(c0);
77 tagged_epsilon_closure(ctx);
78 find_state(ctx);
79
80 // iterate while new kernels are added: for each alphabet symbol,
81 // build tagged epsilon-closure of all reachable NFA states,
82 // then find identical or mappable DFA state or add a new one
83 for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) {
84 ctx.dc_origin = i;
85 clear_caches(ctx);
86
87 for (uint32_t c = 0; c < ctx.dfa.nchars; ++c) {
88 reach_on_symbol(ctx, c);
89 tagged_epsilon_closure(ctx);
90 find_state(ctx);
91 }
92 }
93
94 warn_nondeterministic_tags(ctx);
95 }
96
97 template<typename ctx_t>
clear_caches(ctx_t & ctx)98 void clear_caches(ctx_t &ctx)
99 {
100 ctx.dc_newvers.clear();
101
102 const size_t ntags = ctx.nfa.tags.size();
103 for (size_t t = 0; t < ntags; ++t) {
104 ctx.dc_hc_caches[t].clear();
105 }
106 }
107
108 template<typename ctx_t>
reach_on_symbol(ctx_t & ctx,uint32_t sym)109 void reach_on_symbol(ctx_t &ctx, uint32_t sym)
110 {
111 ctx.dc_symbol = sym;
112 const uint32_t symbol = ctx.dfa.charset[sym];
113
114 const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin];
115 ctx.oldprectbl = kernel->prectbl;
116 ctx.oldprecdim = kernel->size;
117
118 closure_t &reach = ctx.reach;
119 reach.clear();
120
121 // Add configurations in reverse order: leftmost greedy closure uses
122 // the resulting array as stack, and POSIX closure doesn't care (GOR1
123 // pre-sorts configurations, and GTOP uses priority queue).
124 for (uint32_t i = static_cast<uint32_t>(kernel->size); i --> 0; ) {
125 nfa_state_t *s = transition(kernel->state[i], symbol);
126 if (s) {
127 const uint32_t v = ctx.dc_opts->stadfa ? 0 : kernel->tvers[i];
128 const clos_t c(s, i, v, kernel->thist[i], HROOT);
129 reach.push_back(c);
130 }
131 }
132 }
133
transition(nfa_state_t * state,uint32_t symbol)134 nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
135 {
136 if (state->type != nfa_state_t::RAN) {
137 return NULL;
138 }
139 for (const Range *r = state->ran.ran; r; r = r->next()) {
140 if ((r->lower() <= symbol) && (symbol < r->upper())) {
141 return state->ran.out;
142 }
143 }
144 return NULL;
145 }
146
147 template<typename ctx_t>
init_tag_versions(ctx_t & ctx)148 uint32_t init_tag_versions(ctx_t &ctx)
149 {
150 dfa_t &dfa = ctx.dfa;
151 const size_t ntags = dfa.tags.size();
152
153 // all-zero tag configuration must have static number zero
154 ctx.dc_tagvertbl.insert_const(TAGVER_ZERO);
155 DASSERT(ZERO_TAGS == ctx.dc_tagvertbl.insert_const(TAGVER_ZERO));
156
157 // initial tag versions: [1 .. N]
158 const tcid_t INITIAL_TAGS = ctx.dc_tagvertbl.insert_succ(1);
159
160 // other versions: [ .. -(N + 1)] and [N + 1 .. ]
161 dfa.maxtagver = static_cast<tagver_t>(ntags);
162
163 // final/fallback versions will be assigned on the go
164 dfa.finvers = new tagver_t[ntags];
165 for (size_t i = 0; i < ntags; ++i) {
166 dfa.finvers[i] = fixed(dfa.tags[i]) ? TAGVER_ZERO : ++dfa.maxtagver;
167 }
168
169 // mark tags with history (initial and final)
170 for (size_t i = 0; i < ntags; ++i) {
171 if (history(dfa.tags[i])) {
172 tagver_t v = static_cast<tagver_t>(i) + 1, f = dfa.finvers[i];
173 if (f != TAGVER_ZERO) {
174 dfa.mtagvers.insert(f);
175 }
176 dfa.mtagvers.insert(v);
177 }
178 }
179
180 return INITIAL_TAGS;
181 }
182
183 // For each tag, find maximal number of parallel versions of this tag
184 // used in each kernel (degree of non-determinism) and warn about tags with
185 // maximum degree two or more.
186 // WARNING: this function assumes that kernel items are grouped by rule
187 template<typename ctx_t>
warn_nondeterministic_tags(const ctx_t & ctx)188 void warn_nondeterministic_tags(const ctx_t &ctx)
189 {
190 if (ctx.dc_opts->posix_syntax
191 || ctx.dc_opts->stadfa) return;
192
193 Warn &warn = ctx.dc_msg.warn;
194 const kernels_t &kernels = ctx.dc_kernels;
195 const std::vector<Tag> &tags = ctx.dfa.tags;
196 const std::valarray<Rule> &rules = ctx.dfa.rules;
197
198 const size_t
199 ntag = tags.size(),
200 nkrn = kernels.size(),
201 nrule = rules.size();
202 std::vector<size_t> maxv(ntag, 0);
203 std::set<tagver_t> uniq;
204
205 for (uint32_t i = 0; i < nkrn; ++i) {
206 const kernel_t *k = kernels[i];
207 nfa_state_t **s = k->state;
208 const size_t n = k->size;
209 const uint32_t *v = k->tvers;
210
211 for (size_t u = 0; u < n;) {
212 const size_t r = s[u]->rule;
213 const Rule &rule = rules[r];
214
215 const size_t l = u;
216 for (; ++u < n && s[u]->rule == r;);
217 for (size_t t = rule.ltag; t < rule.htag; ++t) {
218 uniq.clear();
219 for (size_t m = l; m < u; ++m) {
220 uniq.insert(ctx.dc_tagvertbl[v[m]][t]);
221 }
222 maxv[t] = std::max(maxv[t], uniq.size());
223 }
224 }
225 }
226
227 for (uint32_t r = 0; r < nrule; ++r) {
228 const Rule &rule = rules[r];
229 for (size_t t = rule.ltag; t < rule.htag; ++t) {
230 const size_t m = maxv[t];
231 if (m > 1) {
232 warn.nondeterministic_tags(rule.semact->loc, ctx.dc_condname,
233 tags[t].name, m);
234 }
235 }
236 }
237 }
238
239 template<typename history_t>
determ_context_t(const opt_t * opts,Msg & msg,const std::string & condname,const nfa_t & nfa,dfa_t & dfa)240 determ_context_t<history_t>::determ_context_t(const opt_t *opts, Msg &msg
241 , const std::string &condname, const nfa_t &nfa, dfa_t &dfa)
242 : dc_opts(opts)
243 , dc_msg(msg)
244 , dc_condname(condname)
245 , nfa(nfa)
246 , dfa(dfa)
247 , dc_allocator()
248 , dc_origin(dfa_t::NIL)
249 , dc_target(dfa_t::NIL)
250 , dc_symbol(0)
251 , dc_actions(NULL)
252 , dc_tagvertbl(nfa.tags.size())
253 , history()
254 , dc_kernels()
255 , dc_buffers()
256 , dc_hc_caches()
257 , dc_newvers(newver_cmp_t<history_t>(history, dc_hc_caches))
258 , dc_path1()
259 , dc_path2()
260 , dc_path3()
261 , dc_tagcount()
262 , stadfa_actions(NULL)
263 , stadfa_tagvers()
264 , reach()
265 , state()
266 , gor1_topsort()
267 , gor1_linear()
268 , gtop_buffer()
269 , gtop_cmp()
270 , gtop_heap(gtop_cmp, gtop_buffer)
271 , newprectbl(NULL)
272 , oldprectbl(NULL)
273 , oldprecdim(0)
274 , histlevel(NULL)
275 , sortcores()
276 , fincount()
277 , worklist()
278 , dump_dfa_tree(*this)
279 , dc_dump(opts)
280 , dc_clstats()
281 {
282 const size_t nstates = nfa.size;
283 const size_t ncores = nfa.ncores;
284 const size_t ntags = nfa.tags.size();
285
286 reach.reserve(nstates);
287 state.reserve(nstates);
288
289 dc_hc_caches.resize(ntags);
290 dc_path1.reserve(ntags);
291 dc_path2.reserve(ntags);
292 dc_path3.reserve(ntags);
293 dc_tagcount.resize(ntags);
294
295 if (opts->posix_semantics) {
296 newprectbl = new prectable_t[ncores * ncores];
297 histlevel = new histleaf_t[ncores];
298 sortcores.reserve(ncores);
299 fincount.resize(ncores + 1);
300 worklist.reserve(nstates);
301 }
302
303 if (opts->posix_closure == POSIX_CLOSURE_GTOP) {
304 gtop_buffer.reserve(nstates);
305 }
306 else {
307 gor1_topsort.reserve(nstates);
308 gor1_linear.reserve(nstates);
309 }
310 }
311
312 template<typename history_t>
~determ_context_t()313 determ_context_t<history_t>::~determ_context_t()
314 {
315 delete[] newprectbl;
316 delete[] histlevel;
317 }
318
319 // explicit instantiation for context types
320 template void reach_on_symbol<ldetctx_t>(ldetctx_t &ctx, uint32_t sym);
321 template void reach_on_symbol<pdetctx_t>(pdetctx_t &ctx, uint32_t sym);
322 template uint32_t init_tag_versions<ldetctx_t>(ldetctx_t &ctx);
323 template uint32_t init_tag_versions<pdetctx_t>(pdetctx_t &ctx);
324 template determ_context_t<lhistory_t>::~determ_context_t();
325 template determ_context_t<phistory_t>::~determ_context_t();
326 template determ_context_t<lhistory_t>::determ_context_t(const opt_t*, Msg&,
327 const std::string&, const nfa_t&, dfa_t&);
328 template determ_context_t<phistory_t>::determ_context_t(const opt_t*, Msg&,
329 const std::string&, const nfa_t&, dfa_t&);
330
331 } // namespace re2c
332
333