1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the utils of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
21 ** included in the packaging of this file. Please review the following
22 ** information to ensure the GNU General Public License requirements will
23 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
24 **
25 ** $QT_END_LICENSE$
26 **
27 ****************************************************************************/
28 
29 #include "generator.h"
30 
31 #include <QFile>
32 #include <QDir>
33 
printDeclaration(CodeBlock & block,const QString & funcNamePrefix) const34 void Function::printDeclaration(CodeBlock &block, const QString &funcNamePrefix) const
35 {
36     block << (iline ? "inline " : "") << signature(funcNamePrefix) << (iline ? QLatin1String(" {") : QLatin1String(";"));
37     if (!iline)
38         return;
39 
40     block.indent();
41     QString tmp = body;
42     if (tmp.endsWith(QLatin1Char('\n')))
43         tmp.chop(1);
44     foreach (QString line, tmp.split(QLatin1Char('\n')))
45         block << line;
46     block.outdent();
47     block << "}";
48 }
49 
signature(const QString & funcNamePrefix) const50 QString Function::signature(const QString &funcNamePrefix) const
51 {
52    QString sig;
53    if (!rtype.isEmpty()) {
54        sig += rtype;
55        sig += QLatin1Char(' ');
56    }
57    sig += funcNamePrefix;
58    sig += fname;
59    if (cnst)
60        sig += " const";
61    return sig;
62 }
63 
definition() const64 QString Function::definition() const
65 {
66     if (iline)
67         return QString();
68 
69     QString result;
70     result += signature();
71     result += QLatin1String("\n{\n");
72 
73     QString tmp = body;
74 
75     if (tmp.endsWith(QLatin1Char('\n')))
76         tmp.chop(1);
77     if (!tmp.startsWith(QLatin1Char('\n')))
78         tmp.prepend("    ");
79 
80     tmp.replace(QLatin1Char('\n'), QLatin1String("\n    "));
81 
82     result += tmp;
83 
84     result += QLatin1String("\n}\n");
85 
86     return result;
87 }
88 
printDeclaration(const Class * klass,CodeBlock & block) const89 void Class::Section::printDeclaration(const Class *klass, CodeBlock &block) const
90 {
91     foreach (Function ctor, constructors)
92         ctor.printDeclaration(block, klass->name());
93 
94     if (!constructors.isEmpty())
95         block.addNewLine();
96 
97     foreach (Function func, functions)
98         func.printDeclaration(block);
99 
100     if (!functions.isEmpty())
101         block.addNewLine();
102 
103     foreach (QString var, variables)
104         block << var << ';';
105 }
106 
addConstructor(Access access,const QString & body,const QString & _args)107 void Class::addConstructor(Access access, const QString &body, const QString &_args)
108 {
109     Function ctor;
110     QString args = _args;
111     if (!args.startsWith(QLatin1Char('('))
112         && !args.endsWith(QLatin1Char(')'))) {
113         args.prepend('(');
114         args.append(')');
115     }
116     ctor.setName(args);
117     ctor.addBody(body);
118     sections[access].constructors.append(ctor);
119 }
120 
definition(const Class * klass) const121 QString Class::Section::definition(const Class *klass) const
122 {
123     QString result;
124 
125     foreach (Function ctor, constructors) {
126         ctor.setName(klass->name() + "::" + klass->name() + ctor.name());
127         result += ctor.definition();
128         result += QLatin1Char('\n');
129     }
130 
131     foreach (Function func, functions) {
132         if (!func.hasBody()) continue;
133         func.setName(klass->name() + "::" + func.name());
134         result += func.definition();
135         result += QLatin1Char('\n');
136     }
137 
138     return result;
139 }
140 
declaration() const141 QString Class::declaration() const
142 {
143     CodeBlock block;
144 
145     block << QLatin1String("class ") << cname;
146     block << "{";
147 
148     if (!sections[PublicMember].isEmpty()) {
149         block << "public:";
150         block.indent();
151         sections[PublicMember].printDeclaration(this, block);
152         block.outdent();
153     }
154 
155     if (!sections[ProtectedMember].isEmpty()) {
156         block << "protected:";
157         block.indent();
158         sections[ProtectedMember].printDeclaration(this, block);
159         block.outdent();
160     }
161 
162     if (!sections[PrivateMember].isEmpty()) {
163         block << "private:";
164         block.indent();
165         sections[PrivateMember].printDeclaration(this, block);
166         block.outdent();
167     }
168 
169     block << "};";
170     block.addNewLine();
171 
172     return block.toString();
173 }
174 
definition() const175 QString Class::definition() const
176 {
177     return sections[PrivateMember].definition(this)
178            + sections[ProtectedMember].definition(this)
179            + sections[PublicMember].definition(this);
180 }
181 
Generator(const DFA & _dfa,const Config & config)182 Generator::Generator(const DFA &_dfa, const Config &config)
183      : dfa(_dfa), cfg(config)
184 {
185     QList<InputType> lst = cfg.maxInputSet.toList();
186     std::sort(lst.begin(), lst.end());
187     minInput = lst.first();
188     maxInput = lst.last();
189 
190     ConfigFile::Section section = config.configSections.value("Code Generator Options");
191 
192     foreach (ConfigFile::Entry entry, section) {
193         if (!entry.key.startsWith(QLatin1String("MapToCode["))
194             || !entry.key.endsWith(QLatin1Char(']')))
195             continue;
196         QString range = entry.key;
197         range.remove(0, qstrlen("MapToCode["));
198         range.chop(1);
199         if (range.length() != 3
200             || range.at(1) != QLatin1Char('-')) {
201             qWarning("Invalid range for char mapping function: %s", qPrintable(range));
202             continue;
203         }
204         TransitionSequence seq;
205         seq.first = range.at(0).unicode();
206         seq.last = range.at(2).unicode();
207         seq.testFunction = entry.value;
208         charFunctionRanges.append(seq);
209     }
210 
211     QString tokenPrefix = section.value("TokenPrefix");
212     if (!tokenPrefix.isEmpty()) {
213         for (int i = 0; i < dfa.count(); ++i)
214             if (!dfa.at(i).symbol.isEmpty()
215                 && !dfa.at(i).symbol.endsWith(QLatin1String("()")))
216                 dfa[i].symbol.prepend(tokenPrefix);
217     }
218 
219     headerFileName = section.value("FileHeader");
220 }
221 
adjacentKeys(int left,int right)222 static inline bool adjacentKeys(int left, int right) { return left + 1 == right; }
223 //static inline bool adjacentKeys(const InputType &left, const InputType &right)
224 //{ return left.val + 1 == right.val; }
225 
convertToSequences(const TransitionMap & transitions)226 static QVector<Generator::TransitionSequence> convertToSequences(const TransitionMap &transitions)
227 {
228     QVector<Generator::TransitionSequence> sequences;
229     if (transitions.isEmpty())
230         return sequences;
231 
232     QList<InputType> keys = transitions.keys();
233     std::sort(keys.begin(), keys.end());
234     int i = 0;
235     Generator::TransitionSequence sequence;
236     sequence.first = keys.at(0);
237     ++i;
238     for (; i < keys.count(); ++i) {
239         if (adjacentKeys(keys.at(i - 1), keys.at(i))
240             && transitions.value(keys.at(i)) == transitions.value(keys.at(i - 1))) {
241             continue;
242         }
243         sequence.last = keys.at(i - 1);
244         sequence.transition = transitions.value(sequence.last);
245         sequences.append(sequence);
246 
247         sequence.first = keys.at(i);
248     }
249     sequence.last = keys.at(i - 1);
250     sequence.transition = transitions.value(sequence.last);
251     sequences.append(sequence);
252 
253     return sequences;
254 }
255 
operator <<(QDebug & debug,const Generator::TransitionSequence & seq)256 QDebug &operator<<(QDebug &debug, const Generator::TransitionSequence &seq)
257 {
258     return debug << "[first:" << seq.first << "; last:" << seq.last << "; transition:" << seq.transition
259                  << (seq.testFunction.isEmpty() ? QString() : QString(QString("; testfunction:" + seq.testFunction)))
260                  << "]";
261 }
262 
isSingleReferencedFinalState(int i) const263 bool Generator::isSingleReferencedFinalState(int i) const
264 {
265     return backReferenceMap.value(i) == 1
266            && dfa.at(i).transitions.isEmpty()
267            && !dfa.at(i).symbol.isEmpty();
268 }
269 
generateTransitions(CodeBlock & body,const TransitionMap & transitions)270 void Generator::generateTransitions(CodeBlock &body, const TransitionMap &transitions)
271 {
272     if (transitions.isEmpty())
273         return;
274 
275     QVector<TransitionSequence> sequences = convertToSequences(transitions);
276 
277     bool needsCharFunction = false;
278     if (!charFunctionRanges.isEmpty()) {
279         int i = 0;
280         while (i < sequences.count()) {
281             const TransitionSequence &seq = sequences.at(i);
282             if (!seq.testFunction.isEmpty()) {
283                 ++i;
284                 continue;
285             }
286 
287             foreach (TransitionSequence range, charFunctionRanges)
288                 if (range.first >= seq.first && range.last <= seq.last) {
289                     needsCharFunction = true;
290 
291                     TransitionSequence left, middle, right;
292 
293                     left.first = seq.first;
294                     left.last = range.first - 1;
295                     left.transition = seq.transition;
296 
297                     middle = range;
298                     middle.transition = seq.transition;
299 
300                     right.first = range.last + 1;
301                     right.last = seq.last;
302                     right.transition = seq.transition;
303 
304                     sequences.remove(i);
305                     if (left.last >= left.first) {
306                         sequences.insert(i, left);
307                         ++i;
308                     }
309                     sequences.insert(i, middle);
310                     ++i;
311                     if (right.last >= right.first) {
312                         sequences.insert(i, right);
313                         ++i;
314                     }
315 
316                     i = -1;
317                     break;
318                 }
319 
320             ++i;
321         }
322     }
323 
324     //qDebug() << "sequence count" << sequences.count();
325     //qDebug() << sequences;
326 
327     if (sequences.count() < 10
328         || sequences.last().last == maxInput
329         || needsCharFunction) {
330         foreach (TransitionSequence seq, sequences) {
331             const bool embedFinalState = isSingleReferencedFinalState(seq.transition);
332 
333             QString brace;
334             if (embedFinalState)
335                 brace = " {";
336 
337             if (!seq.testFunction.isEmpty()) {
338                 body << "if (" << seq.testFunction << ")" << brace;
339             } else if (seq.first == seq.last) {
340                 body << "if (ch.unicode() == " << seq.first << ")" << brace;
341             } else {
342                 if (seq.last < maxInput)
343                     body << "if (ch.unicode() >= " << seq.first
344                          << " && ch.unicode() <= " << seq.last << ")" << brace;
345                 else
346                     body << "if (ch.unicode() >= " << seq.first << ")" << brace;
347             }
348             body.indent();
349             if (embedFinalState) {
350                 body << "token = " << dfa.at(seq.transition).symbol << ";";
351                 body << "goto found;";
352 
353                 body.outdent();
354                 body << "}";
355             } else {
356                 body << "goto state_" << seq.transition << ";";
357                 body.outdent();
358             }
359         }
360     } else {
361         QList<InputType> keys = transitions.keys();
362         std::sort(keys.begin(), keys.end());
363 
364         body << "switch (ch.unicode()) {";
365         body.indent();
366         for (int k = 0; k < keys.count(); ++k) {
367             const InputType key = keys.at(k);
368             const int trans = transitions.value(key);
369 
370             QString keyStr;
371             if (key == '\\')
372                 keyStr = QString("\'\\\\\'");
373             else if (key >= 48 && key < 127)
374                 keyStr = QString('\'') + QChar::fromLatin1(char(key)) + QChar('\'');
375             else
376                 keyStr = QString::number(key);
377 
378             if (k < keys.count() - 1
379                 && transitions.value(keys.at(k + 1)) == trans) {
380                 body << "case " << keyStr << ":";
381             } else {
382                 if (isSingleReferencedFinalState(trans)) {
383                     body << "case " << keyStr << ": token = " << dfa.at(trans).symbol << "; goto found;";
384                 } else {
385                     body << "case " << keyStr << ": goto state_" << trans << ";";
386                 }
387             }
388         }
389         body.outdent();
390         body << "}";
391     }
392 }
393 
generate()394 QString Generator::generate()
395 {
396     Class klass(cfg.className);
397 
398     klass.addMember(Class::PublicMember, "QString input");
399     klass.addMember(Class::PublicMember, "int pos");
400     klass.addMember(Class::PublicMember, "int lexemStart");
401     klass.addMember(Class::PublicMember, "int lexemLength");
402 
403     {
404         CodeBlock body;
405         body << "input = inp;";
406         body << "pos = 0;";
407         body << "lexemStart = 0;";
408         body << "lexemLength = 0;";
409         klass.addConstructor(Class::PublicMember, body, "const QString &inp");
410     }
411 
412     {
413         Function next("QChar", "next()");
414         next.setInline(true);
415         if (cfg.caseSensitivity == Qt::CaseSensitive)
416             next.addBody("return (pos < input.length()) ? input.at(pos++) : QChar();");
417         else
418             next.addBody("return (pos < input.length()) ? input.at(pos++).toLower() : QChar();");
419         klass.addMember(Class::PublicMember, next);
420     }
421 
422     /*
423     {
424         Function lexem("QString", "lexem()");
425         lexem.setConst(true);
426         lexem.setInline(true);
427         lexem.addBody("return input.mid(lexemStart, lexemLength);");
428         klass.addMember(Class::PublicMember, lexem);
429     }
430     */
431 
432     for (int i = 0; i < dfa.count(); ++i)
433         if (dfa.at(i).symbol.endsWith(QLatin1String("()"))) {
434             Function handlerFunc("int", dfa.at(i).symbol);
435             klass.addMember(Class::PublicMember, handlerFunc);
436         }
437 
438     Function lexFunc;
439     lexFunc.setReturnType("int");
440     lexFunc.setName("lex()");
441 
442     CodeBlock body;
443     body << "lexemStart = pos;";
444     body << "lexemLength = 0;";
445     body << "int lastAcceptingPos = -1;";
446     body << "int token = -1;";
447     body << "QChar ch;";
448     body.addNewLine();
449 
450     backReferenceMap.clear();
451     foreach (State s, dfa)
452         foreach (int state, s.transitions)
453             backReferenceMap[state]++;
454 
455     bool haveSingleReferencedFinalState = false;
456 
457     for (int i = 0; i < dfa.count(); ++i) {
458         if (isSingleReferencedFinalState(i)) {
459             haveSingleReferencedFinalState = true;
460             continue;
461         }
462 
463         if (i > 0)
464             body << "state_" << i << ":";
465         else
466             body << "// initial state";
467 
468         body.indent();
469 
470         if (!dfa.at(i).symbol.isEmpty()) {
471             body << "lastAcceptingPos = pos;";
472             body << "token = " << dfa.at(i).symbol << ";";
473         }
474 
475         body.outdent();
476 
477         body.indent();
478 
479         if (!dfa.at(i).transitions.isEmpty()) {
480             body << "ch = next();";
481             generateTransitions(body, dfa.at(i).transitions);
482         }
483 
484         body << "goto out;";
485 
486         body.outdent();
487     }
488 
489     if (haveSingleReferencedFinalState) {
490         body << "found:";
491         body << "lastAcceptingPos = pos;";
492         body.addNewLine();
493     }
494 
495     body << "out:";
496     body << "if (lastAcceptingPos != -1) {";
497     body.indent();
498     body << "lexemLength = lastAcceptingPos - lexemStart;";
499     body << "pos = lastAcceptingPos;";
500     body.outdent();
501     body << "}";
502     body << "return token;";
503 
504     lexFunc.addBody(body);
505 
506     klass.addMember(Class::PublicMember, lexFunc);
507 
508     QString header;
509     if (!headerFileName.isEmpty()) {
510         QString self(QDir::fromNativeSeparators(QStringLiteral(__FILE__)));
511         int lastSep = self.lastIndexOf(QChar('/'));
512         QDir here(lastSep < 0 ? QStringLiteral(".") : self.left(lastSep));
513         QFile headerFile(QDir::cleanPath(here.filePath(headerFileName)));
514         if (headerFile.exists() && headerFile.open(QIODevice::ReadOnly))
515             header = QString::fromUtf8(headerFile.readAll());
516     }
517 
518     header += QLatin1String("// auto generated by qtbase/util/lexgen/. DO NOT EDIT.\n");
519 
520     return header + klass.declaration() + klass.definition();
521 }
522 
523