1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the utils of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
21 ** included in the packaging of this file. Please review the following
22 ** information to ensure the GNU General Public License requirements will
23 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
24 **
25 ** $QT_END_LICENSE$
26 **
27 ****************************************************************************/
28 #include "nfa.h"
29 #include <QSet>
30 #include <limits.h>
31
createSingleInputNFA(InputType input)32 NFA NFA::createSingleInputNFA(InputType input)
33 {
34 NFA result;
35 result.initialize(2);
36 result.addTransition(result.initialState, input, result.finalState);
37 return result;
38 }
39
createSymbolNFA(const QString & symbol)40 NFA NFA::createSymbolNFA(const QString &symbol)
41 {
42 NFA result = NFA::createSingleInputNFA(Epsilon);
43 result.states[result.finalState].symbol = symbol;
44 return result;
45 }
46
initialize(int size)47 void NFA::initialize(int size)
48 {
49 states.resize(size);
50 states.fill(State());
51 initialState = 0;
52 finalState = size - 1;
53 }
54
addTransition(int from,InputType input,int to)55 void NFA::addTransition(int from, InputType input, int to)
56 {
57 assertValidState(from);
58 assertValidState(to);
59
60 states[from].transitions.insertMulti(input, to);
61 }
62
copyFrom(const NFA & other,int baseState)63 void NFA::copyFrom(const NFA &other, int baseState)
64 {
65 assertValidState(baseState);
66 assertValidState(baseState + other.states.count() - 1);
67
68 for (int i = 0; i < other.states.count(); ++i) {
69 State s = other.states.at(i);
70
71 for (TransitionMap::Iterator it = s.transitions.begin(),
72 end = s.transitions.end(); it != end; ++it)
73 *it += baseState;
74
75 states[baseState + i] = s;
76 }
77 }
78
initializeFromPair(const NFA & a,const NFA & b,int * initialA,int * finalA,int * initialB,int * finalB)79 void NFA::initializeFromPair(const NFA &a, const NFA &b,
80 int *initialA, int *finalA,
81 int *initialB, int *finalB)
82 {
83 initialize(a.states.count() + b.states.count() + 2);
84
85 int baseIdxA = 1;
86 int baseIdxB = 1 + a.states.count();
87
88 *initialA = a.initialState + baseIdxA;
89 *finalA = a.finalState + baseIdxA;
90
91 *initialB = b.initialState + baseIdxB;
92 *finalB = b.finalState + baseIdxB;
93
94 copyFrom(a, baseIdxA);
95 copyFrom(b, baseIdxB);
96 }
97
createAlternatingNFA(const NFA & a,const NFA & b)98 NFA NFA::createAlternatingNFA(const NFA &a, const NFA &b)
99 {
100 NFA result;
101
102 int newInitialA, newFinalA,
103 newInitialB, newFinalB;
104
105 result.initializeFromPair(a, b, &newInitialA, &newFinalA,
106 &newInitialB, &newFinalB);
107
108 result.addTransition(result.initialState, Epsilon, newInitialA);
109 result.addTransition(result.initialState, Epsilon, newInitialB);
110
111 result.addTransition(newFinalA, Epsilon, result.finalState);
112 result.addTransition(newFinalB, Epsilon, result.finalState);
113
114 return result;
115 }
116
createConcatenatingNFA(const NFA & a,const NFA & b)117 NFA NFA::createConcatenatingNFA(const NFA &a, const NFA &b)
118 {
119 NFA result;
120
121 int initialA, finalA,
122 initialB, finalB;
123
124 result.initializeFromPair(a, b, &initialA, &finalA, &initialB, &finalB);
125
126 result.addTransition(result.initialState, Epsilon, initialA);
127 result.addTransition(finalA, Epsilon, initialB);
128 result.addTransition(finalB, Epsilon, result.finalState);
129 return result;
130 }
131
createOptionalNFA(const NFA & a)132 NFA NFA::createOptionalNFA(const NFA &a)
133 {
134 NFA result;
135
136 result.initialize(a.states.count() + 2);
137
138 int baseIdxA = 1;
139 int initialA = a.initialState + baseIdxA;
140 int finalA = a.finalState + baseIdxA;
141
142 result.copyFrom(a, baseIdxA);
143
144 result.addTransition(result.initialState, Epsilon, initialA);
145 result.addTransition(result.initialState, Epsilon, result.finalState);
146
147 result.addTransition(finalA, Epsilon, initialA);
148 result.addTransition(finalA, Epsilon, result.finalState);
149
150 return result;
151 }
152
createStringNFA(const QByteArray & str)153 NFA NFA::createStringNFA(const QByteArray &str)
154 {
155 NFA result;
156 foreach (char c, str) {
157 NFA ch = NFA::createSingleInputNFA(c);
158 if (result.isEmpty())
159 result = ch;
160 else
161 result = NFA::createConcatenatingNFA(result, ch);
162 }
163 return result;
164 }
165
createSetNFA(const QSet<InputType> & set)166 NFA NFA::createSetNFA(const QSet<InputType> &set)
167 {
168 NFA result;
169 result.initialize(set.count() + 2);
170
171 int state = 1;
172 for (QSet<InputType>::ConstIterator it = set.constBegin(), end = set.constEnd();
173 it != end; ++it, ++state) {
174 result.addTransition(result.initialState, Epsilon, state);
175 result.addTransition(state, *it, result.finalState);
176 }
177
178 /*
179 foreach (InputType input, set) {
180 NFA ch = NFA::createSingleInputNFA(input);
181 if (result.isEmpty())
182 result = ch;
183 else
184 result = NFA::createAlternatingNFA(result, ch);
185 }
186 */
187 return result;
188 }
189
createZeroOrOneNFA(const NFA & a)190 NFA NFA::createZeroOrOneNFA(const NFA &a)
191 {
192 NFA epsilonNFA = createSingleInputNFA(Epsilon);
193 return NFA::createAlternatingNFA(a, epsilonNFA);
194 }
195
applyQuantity(const NFA & a,int minOccurrences,int maxOccurrences)196 NFA NFA::applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences)
197 {
198 NFA result = a;
199 NFA epsilonNFA = createSingleInputNFA(Epsilon);
200
201 if (minOccurrences == 0) {
202 result = NFA::createAlternatingNFA(result, epsilonNFA);
203 } else {
204 minOccurrences--;
205 }
206 maxOccurrences--;
207
208 for (int i = 0; i < minOccurrences; ++i)
209 result = NFA::createConcatenatingNFA(result, a);
210
211 for (int i = minOccurrences; i < maxOccurrences; ++i)
212 result = NFA::createConcatenatingNFA(result, NFA::createAlternatingNFA(a, epsilonNFA));
213
214 return result;
215 }
216
debug()217 void NFA::debug()
218 {
219 qDebug() << "NFA has" << states.count() << "states";
220 qDebug() << "initial state is" << initialState;
221 qDebug() << "final state is" << finalState;
222
223 for (int i = 0; i < states.count(); ++i) {
224 const State &s = states.at(i);
225 for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
226 end = s.transitions.constEnd(); it != end; ++it)
227 qDebug() << "transition from state" << i << "to" << it.value() << "through"
228 << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
229 if (!s.symbol.isEmpty())
230 qDebug() << "State" << i << "leads to symbol" << s.symbol;
231 }
232 }
233
234 // helper
235 typedef QSet<int> DFAState;
236
237 // that's a bad hash, but it's good enough for us
238 // and it allows us to use the nice QHash API :)
qHash(const DFAState & state)239 inline uint qHash(const DFAState &state)
240 {
241 uint val = 0;
242 foreach (int s, state)
243 val |= qHash(s);
244 return val;
245 }
246
toDFA() const247 DFA NFA::toDFA() const
248 {
249 DFA result;
250 result.reserve(states.count());
251
252 QHash<QString, int> symbolReferenceCounts;
253 {
254 QSet<int> symbolStates;
255 for (int i = 0; i < states.count(); ++i)
256 if (!states.at(i).symbol.isEmpty())
257 symbolStates.insert(i);
258
259 QHash<int, QString> epsilonStates;
260 for (int i = 0; i < states.count(); ++i) {
261 const State &s = states.at(i);
262 for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
263 transition != end; ++transition)
264 if (transition.key() == Epsilon && symbolStates.contains(transition.value()))
265 epsilonStates.insert(i, states.at(transition.value()).symbol);
266 }
267
268 int lastCount;
269 do {
270 lastCount = epsilonStates.count();
271 for (int i = 0; i < states.count(); ++i) {
272 const State &s = states.at(i);
273 for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
274 transition != end; ++transition)
275 if (transition.key() == Epsilon && epsilonStates.contains(transition.value()))
276 epsilonStates.insert(i, epsilonStates.value(transition.value()));
277 }
278 } while (lastCount != epsilonStates.count());
279
280 for (int i = 0; i < states.count(); ++i) {
281 const State &s = states.at(i);
282 for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
283 transition != end; ++transition) {
284 if (transition.key() == Epsilon)
285 continue;
286 if (symbolStates.contains(transition.value())) {
287 const QString symbol = states.at(transition.value()).symbol;
288 symbolReferenceCounts[symbol]++;
289 } else if (epsilonStates.contains(transition.value())) {
290 const QString symbol = epsilonStates.value(transition.value());
291 symbolReferenceCounts[symbol]++;
292 }
293 }
294 }
295 /*
296 for (QHash<QString, int>::ConstIterator symIt = symbolReferenceCounts.constBegin(), symEnd = symbolReferenceCounts.constEnd();
297 symIt != symEnd; ++symIt)
298 qDebug() << "symbol" << symIt.key() << "is reached" << symIt.value() << "times";
299 */
300 }
301
302 QSet<InputType> validInput;
303 foreach (const State &s, states)
304 for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
305 end = s.transitions.constEnd(); it != end; ++it)
306 if (it.key() != Epsilon)
307 validInput.insert(it.key());
308
309 // A DFA state can consist of multiple NFA states.
310 // the dfaStateMap maps from these to the actual
311 // state index within the resulting DFA vector
312 QHash<DFAState, int> dfaStateMap;
313 QStack<DFAState> pendingDFAStates;
314
315 DFAState startState = epsilonClosure(QSet<int>() << initialState);
316
317 result.resize(1);
318 dfaStateMap.insert(startState, 0);
319
320 pendingDFAStates.push(startState);
321
322 while (!pendingDFAStates.isEmpty()) {
323 DFAState state = pendingDFAStates.pop();
324 // qDebug() << "processing" << state << "from the stack of pending states";
325
326 foreach (InputType input, validInput) {
327
328 QSet<int> reachableStates;
329
330 foreach (int nfaState, state) {
331 const TransitionMap &transitions = states.at(nfaState).transitions;
332 TransitionMap::ConstIterator it = transitions.find(input);
333 while (it != transitions.constEnd() && it.key() == input) {
334 reachableStates.insert(it.value());
335 ++it;
336 }
337 }
338
339 if (reachableStates.isEmpty())
340 continue;
341
342 // qDebug() << "can reach" << reachableStates << "from input" << char(input);
343
344 QSet<int> closure = epsilonClosure(reachableStates);
345
346 // qDebug() << "closure is" << closure;
347
348 if (!dfaStateMap.contains(closure)) {
349 int dfaState = result.count();
350 result.append(State());
351
352 QString symbol;
353 int refCount = INT_MAX;
354 foreach (int nfaState, closure)
355 if (!states.at(nfaState).symbol.isEmpty()) {
356 // qDebug() << "closure also contains symbol" << states.at(nfaState).symbol;
357 QString candidate = states.at(nfaState).symbol;
358 int candidateRefCount =symbolReferenceCounts.value(candidate, INT_MAX);
359 if (candidateRefCount < refCount) {
360 refCount = candidateRefCount;
361 symbol = candidate;
362 }
363 }
364 if (!symbol.isEmpty())
365 result.last().symbol = symbol;
366
367 dfaStateMap.insert(closure, dfaState);
368
369 Q_ASSERT(!pendingDFAStates.contains(closure));
370 pendingDFAStates.prepend(closure);
371 }
372
373 result[dfaStateMap.value(state)].transitions.insert(input, dfaStateMap.value(closure));
374 }
375 }
376
377 return result;
378 }
379
epsilonClosure(const QSet<int> & initialClosure) const380 QSet<int> NFA::epsilonClosure(const QSet<int> &initialClosure) const
381 {
382 QSet<int> closure = initialClosure;
383 closure.reserve(closure.count() * 4);
384
385 QStack<int> stateStack;
386 stateStack.resize(closure.count());
387 std::copy(closure.constBegin(), closure.constEnd(), stateStack.begin());
388
389 while (!stateStack.isEmpty()) {
390 int t = stateStack.pop();
391 const TransitionMap &transitions = states.at(t).transitions;
392 TransitionMap::ConstIterator it = transitions.find(Epsilon);
393 while (it != transitions.constEnd() && it.key() == Epsilon) {
394 const int u = it.value();
395 if (!closure.contains(u)) {
396 closure.insert(u);
397 stateStack.push(u);
398 }
399 ++it;
400 }
401 }
402
403 return closure;
404 }
405
setTerminationSymbol(const QString & symbol)406 void NFA::setTerminationSymbol(const QString &symbol)
407 {
408 states[finalState].symbol = symbol;
409 }
410
debug() const411 void DFA::debug() const
412 {
413 qDebug() << "DFA has" << count() << "states";
414
415 for (int i = 0; i < count(); ++i) {
416 const State &s = at(i);
417 if (s.transitions.isEmpty()) {
418 qDebug() << "State" << i << "has no transitions";
419 } else {
420 for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
421 end = s.transitions.constEnd(); it != end; ++it)
422 qDebug() << "transition from state" << i << "to" << it.value() << "through"
423 << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
424 }
425 if (!s.symbol.isEmpty())
426 qDebug() << "State" << i << "leads to symbol" << s.symbol;
427 }
428
429 }
430
minimize() const431 DFA DFA::minimize() const
432 {
433 QVector<bool> inequivalentStates(count() * count());
434 inequivalentStates.fill(false);
435
436 for (int i = 0; i < count(); ++i)
437 for (int j = 0; j < i; ++j) {
438 if (i != j && at(i).symbol != at(j).symbol)
439 inequivalentStates[i * count() + j] = true;
440 }
441
442 bool done;
443 do {
444 done = true;
445 for (int i = 0; i < count(); ++i)
446 for (int j = 0; j < count(); ++j) {
447 if (i == j)
448 continue;
449
450 if (inequivalentStates[i * count() + j])
451 continue;
452
453 if (at(i).transitions.keys() != at(j).transitions.keys()) {
454 inequivalentStates[i * count() + j] = true;
455 done = false;
456 continue;
457 }
458
459 foreach (InputType a, at(i).transitions.keys()) {
460 int r = at(i).transitions.value(a, -1);
461 if (r == -1)
462 continue;
463 int s = at(j).transitions.value(a, -1);
464 if (s == -1)
465 continue;
466
467 if (inequivalentStates[r * count() + s]
468 || r == s) {
469 inequivalentStates[i * count() + j] = true;
470 done = false;
471 break;
472 }
473 }
474 }
475 } while (!done);
476
477 QHash<int, int> statesToEliminate;
478 for (int i = 0; i < count(); ++i)
479 for (int j = 0; j < i; ++j)
480 if (!inequivalentStates[i * count() + j]) {
481 statesToEliminate.insertMulti(i, j);
482 }
483
484 /*
485 qDebug() << "states to eliminiate:" << statesToEliminate.count();;
486 qDebug() << "merging" << statesToEliminate;
487 debug();
488 */
489
490 return *this;
491 }
492