1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the utils of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
21 ** included in the packaging of this file. Please review the following
22 ** information to ensure the GNU General Public License requirements will
23 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
24 **
25 ** $QT_END_LICENSE$
26 **
27 ****************************************************************************/
28 #include "nfa.h"
29 #include <QSet>
30 #include <limits.h>
31 
createSingleInputNFA(InputType input)32 NFA NFA::createSingleInputNFA(InputType input)
33 {
34     NFA result;
35     result.initialize(2);
36     result.addTransition(result.initialState, input, result.finalState);
37     return result;
38 }
39 
createSymbolNFA(const QString & symbol)40 NFA NFA::createSymbolNFA(const QString &symbol)
41 {
42     NFA result = NFA::createSingleInputNFA(Epsilon);
43     result.states[result.finalState].symbol = symbol;
44     return result;
45 }
46 
initialize(int size)47 void NFA::initialize(int size)
48 {
49     states.resize(size);
50     states.fill(State());
51     initialState = 0;
52     finalState = size - 1;
53 }
54 
addTransition(int from,InputType input,int to)55 void NFA::addTransition(int from, InputType input, int to)
56 {
57     assertValidState(from);
58     assertValidState(to);
59 
60     states[from].transitions.insertMulti(input, to);
61 }
62 
copyFrom(const NFA & other,int baseState)63 void NFA::copyFrom(const NFA &other, int baseState)
64 {
65     assertValidState(baseState);
66     assertValidState(baseState + other.states.count() - 1);
67 
68     for (int i = 0; i < other.states.count(); ++i) {
69         State s = other.states.at(i);
70 
71         for (TransitionMap::Iterator it = s.transitions.begin(),
72              end = s.transitions.end(); it != end; ++it)
73             *it += baseState;
74 
75         states[baseState + i] = s;
76     }
77 }
78 
initializeFromPair(const NFA & a,const NFA & b,int * initialA,int * finalA,int * initialB,int * finalB)79 void NFA::initializeFromPair(const NFA &a, const NFA &b,
80                              int *initialA, int *finalA,
81                              int *initialB, int *finalB)
82 {
83     initialize(a.states.count() + b.states.count() + 2);
84 
85     int baseIdxA = 1;
86     int baseIdxB = 1 + a.states.count();
87 
88     *initialA = a.initialState + baseIdxA;
89     *finalA = a.finalState + baseIdxA;
90 
91     *initialB = b.initialState + baseIdxB;
92     *finalB = b.finalState + baseIdxB;
93 
94     copyFrom(a, baseIdxA);
95     copyFrom(b, baseIdxB);
96 }
97 
createAlternatingNFA(const NFA & a,const NFA & b)98 NFA NFA::createAlternatingNFA(const NFA &a, const NFA &b)
99 {
100     NFA result;
101 
102     int newInitialA, newFinalA,
103         newInitialB, newFinalB;
104 
105     result.initializeFromPair(a, b, &newInitialA, &newFinalA,
106                               &newInitialB, &newFinalB);
107 
108     result.addTransition(result.initialState, Epsilon, newInitialA);
109     result.addTransition(result.initialState, Epsilon, newInitialB);
110 
111     result.addTransition(newFinalA, Epsilon, result.finalState);
112     result.addTransition(newFinalB, Epsilon, result.finalState);
113 
114     return result;
115 }
116 
createConcatenatingNFA(const NFA & a,const NFA & b)117 NFA NFA::createConcatenatingNFA(const NFA &a, const NFA &b)
118 {
119     NFA result;
120 
121     int initialA, finalA,
122         initialB, finalB;
123 
124     result.initializeFromPair(a, b, &initialA, &finalA, &initialB, &finalB);
125 
126     result.addTransition(result.initialState, Epsilon, initialA);
127     result.addTransition(finalA, Epsilon, initialB);
128     result.addTransition(finalB, Epsilon, result.finalState);
129     return result;
130 }
131 
createOptionalNFA(const NFA & a)132 NFA NFA::createOptionalNFA(const NFA &a)
133 {
134     NFA result;
135 
136     result.initialize(a.states.count() + 2);
137 
138     int baseIdxA = 1;
139     int initialA = a.initialState + baseIdxA;
140     int finalA = a.finalState + baseIdxA;
141 
142     result.copyFrom(a, baseIdxA);
143 
144     result.addTransition(result.initialState, Epsilon, initialA);
145     result.addTransition(result.initialState, Epsilon, result.finalState);
146 
147     result.addTransition(finalA, Epsilon, initialA);
148     result.addTransition(finalA, Epsilon, result.finalState);
149 
150     return result;
151 }
152 
createStringNFA(const QByteArray & str)153 NFA NFA::createStringNFA(const QByteArray &str)
154 {
155     NFA result;
156     foreach (char c, str) {
157         NFA ch = NFA::createSingleInputNFA(c);
158         if (result.isEmpty())
159             result = ch;
160         else
161             result = NFA::createConcatenatingNFA(result, ch);
162     }
163     return result;
164 }
165 
createSetNFA(const QSet<InputType> & set)166 NFA NFA::createSetNFA(const QSet<InputType> &set)
167 {
168     NFA result;
169     result.initialize(set.count() + 2);
170 
171     int state = 1;
172     for (QSet<InputType>::ConstIterator it = set.constBegin(), end = set.constEnd();
173          it != end; ++it, ++state) {
174         result.addTransition(result.initialState, Epsilon, state);
175         result.addTransition(state, *it, result.finalState);
176     }
177 
178     /*
179     foreach (InputType input, set) {
180         NFA ch = NFA::createSingleInputNFA(input);
181         if (result.isEmpty())
182             result = ch;
183         else
184             result = NFA::createAlternatingNFA(result, ch);
185     }
186     */
187     return result;
188 }
189 
createZeroOrOneNFA(const NFA & a)190 NFA NFA::createZeroOrOneNFA(const NFA &a)
191 {
192     NFA epsilonNFA = createSingleInputNFA(Epsilon);
193     return NFA::createAlternatingNFA(a, epsilonNFA);
194 }
195 
applyQuantity(const NFA & a,int minOccurrences,int maxOccurrences)196 NFA NFA::applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences)
197 {
198     NFA result = a;
199     NFA epsilonNFA = createSingleInputNFA(Epsilon);
200 
201     if (minOccurrences == 0) {
202         result = NFA::createAlternatingNFA(result, epsilonNFA);
203     } else {
204         minOccurrences--;
205     }
206     maxOccurrences--;
207 
208     for (int i = 0; i < minOccurrences; ++i)
209         result = NFA::createConcatenatingNFA(result, a);
210 
211     for (int i = minOccurrences; i < maxOccurrences; ++i)
212         result = NFA::createConcatenatingNFA(result, NFA::createAlternatingNFA(a, epsilonNFA));
213 
214     return result;
215 }
216 
debug()217 void NFA::debug()
218 {
219     qDebug() << "NFA has" << states.count() << "states";
220     qDebug() << "initial state is" << initialState;
221     qDebug() << "final state is" << finalState;
222 
223     for (int i = 0; i < states.count(); ++i) {
224         const State &s = states.at(i);
225         for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
226              end = s.transitions.constEnd(); it != end; ++it)
227             qDebug() << "transition from state" << i << "to" << it.value() << "through"
228                      << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
229         if (!s.symbol.isEmpty())
230             qDebug() << "State" << i << "leads to symbol" << s.symbol;
231     }
232 }
233 
234 // helper
235 typedef QSet<int> DFAState;
236 
237 // that's a bad hash, but it's good enough for us
238 // and it allows us to use the nice QHash API :)
qHash(const DFAState & state)239 inline uint qHash(const DFAState &state)
240 {
241     uint val = 0;
242     foreach (int s, state)
243         val |= qHash(s);
244     return val;
245 }
246 
toDFA() const247 DFA NFA::toDFA() const
248 {
249     DFA result;
250     result.reserve(states.count());
251 
252     QHash<QString, int> symbolReferenceCounts;
253     {
254         QSet<int> symbolStates;
255         for (int i = 0; i < states.count(); ++i)
256             if (!states.at(i).symbol.isEmpty())
257                 symbolStates.insert(i);
258 
259         QHash<int, QString> epsilonStates;
260         for (int i = 0; i < states.count(); ++i) {
261             const State &s = states.at(i);
262             for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
263                  transition != end; ++transition)
264                 if (transition.key() == Epsilon && symbolStates.contains(transition.value()))
265                     epsilonStates.insert(i, states.at(transition.value()).symbol);
266         }
267 
268         int lastCount;
269         do {
270             lastCount = epsilonStates.count();
271             for (int i = 0; i < states.count(); ++i) {
272                 const State &s = states.at(i);
273                 for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
274                      transition != end; ++transition)
275                     if (transition.key() == Epsilon && epsilonStates.contains(transition.value()))
276                         epsilonStates.insert(i, epsilonStates.value(transition.value()));
277             }
278         } while (lastCount != epsilonStates.count());
279 
280         for (int i = 0; i < states.count(); ++i) {
281             const State &s = states.at(i);
282             for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
283                  transition != end; ++transition) {
284                 if (transition.key() == Epsilon)
285                     continue;
286                 if (symbolStates.contains(transition.value())) {
287                     const QString symbol = states.at(transition.value()).symbol;
288                     symbolReferenceCounts[symbol]++;
289                 } else if (epsilonStates.contains(transition.value())) {
290                     const QString symbol = epsilonStates.value(transition.value());
291                     symbolReferenceCounts[symbol]++;
292                 }
293             }
294         }
295         /*
296         for (QHash<QString, int>::ConstIterator symIt = symbolReferenceCounts.constBegin(), symEnd = symbolReferenceCounts.constEnd();
297              symIt != symEnd; ++symIt)
298             qDebug() << "symbol" << symIt.key() << "is reached" << symIt.value() << "times";
299             */
300     }
301 
302     QSet<InputType> validInput;
303     foreach (const State &s, states)
304         for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
305              end = s.transitions.constEnd(); it != end; ++it)
306             if (it.key() != Epsilon)
307                 validInput.insert(it.key());
308 
309     // A DFA state can consist of multiple NFA states.
310     // the dfaStateMap maps from these to the actual
311     // state index within the resulting DFA vector
312     QHash<DFAState, int> dfaStateMap;
313     QStack<DFAState> pendingDFAStates;
314 
315     DFAState startState = epsilonClosure(QSet<int>() << initialState);
316 
317     result.resize(1);
318     dfaStateMap.insert(startState, 0);
319 
320     pendingDFAStates.push(startState);
321 
322     while (!pendingDFAStates.isEmpty()) {
323         DFAState state = pendingDFAStates.pop();
324 //        qDebug() << "processing" << state << "from the stack of pending states";
325 
326         foreach (InputType input, validInput) {
327 
328             QSet<int> reachableStates;
329 
330             foreach (int nfaState, state) {
331                 const TransitionMap &transitions = states.at(nfaState).transitions;
332                 TransitionMap::ConstIterator it = transitions.find(input);
333                 while (it != transitions.constEnd() && it.key() == input) {
334                     reachableStates.insert(it.value());
335                     ++it;
336                 }
337             }
338 
339             if (reachableStates.isEmpty())
340                 continue;
341 
342 //            qDebug() << "can reach" << reachableStates << "from input" << char(input);
343 
344             QSet<int> closure = epsilonClosure(reachableStates);
345 
346 //            qDebug() << "closure is" << closure;
347 
348             if (!dfaStateMap.contains(closure)) {
349                 int dfaState = result.count();
350                 result.append(State());
351 
352                 QString symbol;
353                 int refCount = INT_MAX;
354                 foreach (int nfaState, closure)
355                     if (!states.at(nfaState).symbol.isEmpty()) {
356 //                        qDebug() << "closure also contains symbol" << states.at(nfaState).symbol;
357                         QString candidate = states.at(nfaState).symbol;
358                         int candidateRefCount =symbolReferenceCounts.value(candidate, INT_MAX);
359                         if (candidateRefCount < refCount) {
360                             refCount = candidateRefCount;
361                             symbol = candidate;
362                         }
363                     }
364                 if (!symbol.isEmpty())
365                     result.last().symbol = symbol;
366 
367                 dfaStateMap.insert(closure, dfaState);
368 
369                 Q_ASSERT(!pendingDFAStates.contains(closure));
370                 pendingDFAStates.prepend(closure);
371             }
372 
373             result[dfaStateMap.value(state)].transitions.insert(input, dfaStateMap.value(closure));
374         }
375     }
376 
377     return result;
378 }
379 
epsilonClosure(const QSet<int> & initialClosure) const380 QSet<int> NFA::epsilonClosure(const QSet<int> &initialClosure) const
381 {
382     QSet<int> closure = initialClosure;
383     closure.reserve(closure.count() * 4);
384 
385     QStack<int> stateStack;
386     stateStack.resize(closure.count());
387     std::copy(closure.constBegin(), closure.constEnd(), stateStack.begin());
388 
389     while (!stateStack.isEmpty()) {
390         int t = stateStack.pop();
391         const TransitionMap &transitions = states.at(t).transitions;
392         TransitionMap::ConstIterator it = transitions.find(Epsilon);
393         while (it != transitions.constEnd() && it.key() == Epsilon) {
394             const int u = it.value();
395             if (!closure.contains(u)) {
396                 closure.insert(u);
397                 stateStack.push(u);
398             }
399             ++it;
400         }
401     }
402 
403     return closure;
404 }
405 
setTerminationSymbol(const QString & symbol)406 void NFA::setTerminationSymbol(const QString &symbol)
407 {
408     states[finalState].symbol = symbol;
409 }
410 
debug() const411 void DFA::debug() const
412 {
413     qDebug() << "DFA has" << count() << "states";
414 
415     for (int i = 0; i < count(); ++i) {
416         const State &s = at(i);
417         if (s.transitions.isEmpty()) {
418             qDebug() << "State" << i << "has no transitions";
419         } else {
420             for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
421                  end = s.transitions.constEnd(); it != end; ++it)
422                 qDebug() << "transition from state" << i << "to" << it.value() << "through"
423                          << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
424         }
425         if (!s.symbol.isEmpty())
426             qDebug() << "State" << i << "leads to symbol" << s.symbol;
427     }
428 
429 }
430 
minimize() const431 DFA DFA::minimize() const
432 {
433     QVector<bool> inequivalentStates(count() * count());
434     inequivalentStates.fill(false);
435 
436     for (int i = 0; i < count(); ++i)
437         for (int j = 0; j < i; ++j) {
438             if (i != j && at(i).symbol != at(j).symbol)
439                 inequivalentStates[i * count() + j] = true;
440         }
441 
442     bool done;
443     do {
444         done = true;
445         for (int i = 0; i < count(); ++i)
446             for (int j = 0; j < count(); ++j) {
447                 if (i == j)
448                     continue;
449 
450                 if (inequivalentStates[i * count() + j])
451                     continue;
452 
453                 if (at(i).transitions.keys() != at(j).transitions.keys()) {
454                     inequivalentStates[i * count() + j] = true;
455                     done = false;
456                     continue;
457                 }
458 
459                 foreach (InputType a, at(i).transitions.keys()) {
460                     int r = at(i).transitions.value(a, -1);
461                     if (r == -1)
462                         continue;
463                     int s = at(j).transitions.value(a, -1);
464                     if (s == -1)
465                         continue;
466 
467                     if (inequivalentStates[r * count() + s]
468                         || r == s) {
469                         inequivalentStates[i * count() + j] = true;
470                         done = false;
471                         break;
472                     }
473                 }
474             }
475     } while (!done);
476 
477     QHash<int, int> statesToEliminate;
478     for (int i = 0; i < count(); ++i)
479         for (int j = 0; j < i; ++j)
480             if (!inequivalentStates[i * count() + j]) {
481                 statesToEliminate.insertMulti(i, j);
482             }
483 
484     /*
485     qDebug() << "states to eliminiate:" << statesToEliminate.count();;
486     qDebug() << "merging" << statesToEliminate;
487     debug();
488     */
489 
490     return *this;
491 }
492