1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the utils of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
21 ** included in the packaging of this file. Please review the following
22 ** information to ensure the GNU General Public License requirements will
23 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
24 **
25 ** $QT_END_LICENSE$
26 **
27 ****************************************************************************/
28
29 #include "generator.h"
30
31 #include <QFile>
32 #include <QDir>
33
printDeclaration(CodeBlock & block,const QString & funcNamePrefix) const34 void Function::printDeclaration(CodeBlock &block, const QString &funcNamePrefix) const
35 {
36 block << (iline ? "inline " : "") << signature(funcNamePrefix) << (iline ? QLatin1String(" {") : QLatin1String(";"));
37 if (!iline)
38 return;
39
40 block.indent();
41 QString tmp = body;
42 if (tmp.endsWith(QLatin1Char('\n')))
43 tmp.chop(1);
44 foreach (QString line, tmp.split(QLatin1Char('\n')))
45 block << line;
46 block.outdent();
47 block << "}";
48 }
49
signature(const QString & funcNamePrefix) const50 QString Function::signature(const QString &funcNamePrefix) const
51 {
52 QString sig;
53 if (!rtype.isEmpty()) {
54 sig += rtype;
55 sig += QLatin1Char(' ');
56 }
57 sig += funcNamePrefix;
58 sig += fname;
59 if (cnst)
60 sig += " const";
61 return sig;
62 }
63
definition() const64 QString Function::definition() const
65 {
66 if (iline)
67 return QString();
68
69 QString result;
70 result += signature();
71 result += QLatin1String("\n{\n");
72
73 QString tmp = body;
74
75 if (tmp.endsWith(QLatin1Char('\n')))
76 tmp.chop(1);
77 if (!tmp.startsWith(QLatin1Char('\n')))
78 tmp.prepend(" ");
79
80 tmp.replace(QLatin1Char('\n'), QLatin1String("\n "));
81
82 result += tmp;
83
84 result += QLatin1String("\n}\n");
85
86 return result;
87 }
88
printDeclaration(const Class * klass,CodeBlock & block) const89 void Class::Section::printDeclaration(const Class *klass, CodeBlock &block) const
90 {
91 foreach (Function ctor, constructors)
92 ctor.printDeclaration(block, klass->name());
93
94 if (!constructors.isEmpty())
95 block.addNewLine();
96
97 foreach (Function func, functions)
98 func.printDeclaration(block);
99
100 if (!functions.isEmpty())
101 block.addNewLine();
102
103 foreach (QString var, variables)
104 block << var << ';';
105 }
106
addConstructor(Access access,const QString & body,const QString & _args)107 void Class::addConstructor(Access access, const QString &body, const QString &_args)
108 {
109 Function ctor;
110 QString args = _args;
111 if (!args.startsWith(QLatin1Char('('))
112 && !args.endsWith(QLatin1Char(')'))) {
113 args.prepend('(');
114 args.append(')');
115 }
116 ctor.setName(args);
117 ctor.addBody(body);
118 sections[access].constructors.append(ctor);
119 }
120
definition(const Class * klass) const121 QString Class::Section::definition(const Class *klass) const
122 {
123 QString result;
124
125 foreach (Function ctor, constructors) {
126 ctor.setName(klass->name() + "::" + klass->name() + ctor.name());
127 result += ctor.definition();
128 result += QLatin1Char('\n');
129 }
130
131 foreach (Function func, functions) {
132 if (!func.hasBody()) continue;
133 func.setName(klass->name() + "::" + func.name());
134 result += func.definition();
135 result += QLatin1Char('\n');
136 }
137
138 return result;
139 }
140
declaration() const141 QString Class::declaration() const
142 {
143 CodeBlock block;
144
145 block << QLatin1String("class ") << cname;
146 block << "{";
147
148 if (!sections[PublicMember].isEmpty()) {
149 block << "public:";
150 block.indent();
151 sections[PublicMember].printDeclaration(this, block);
152 block.outdent();
153 }
154
155 if (!sections[ProtectedMember].isEmpty()) {
156 block << "protected:";
157 block.indent();
158 sections[ProtectedMember].printDeclaration(this, block);
159 block.outdent();
160 }
161
162 if (!sections[PrivateMember].isEmpty()) {
163 block << "private:";
164 block.indent();
165 sections[PrivateMember].printDeclaration(this, block);
166 block.outdent();
167 }
168
169 block << "};";
170 block.addNewLine();
171
172 return block.toString();
173 }
174
definition() const175 QString Class::definition() const
176 {
177 return sections[PrivateMember].definition(this)
178 + sections[ProtectedMember].definition(this)
179 + sections[PublicMember].definition(this);
180 }
181
Generator(const DFA & _dfa,const Config & config)182 Generator::Generator(const DFA &_dfa, const Config &config)
183 : dfa(_dfa), cfg(config)
184 {
185 QList<InputType> lst = cfg.maxInputSet.toList();
186 std::sort(lst.begin(), lst.end());
187 minInput = lst.first();
188 maxInput = lst.last();
189
190 ConfigFile::Section section = config.configSections.value("Code Generator Options");
191
192 foreach (ConfigFile::Entry entry, section) {
193 if (!entry.key.startsWith(QLatin1String("MapToCode["))
194 || !entry.key.endsWith(QLatin1Char(']')))
195 continue;
196 QString range = entry.key;
197 range.remove(0, qstrlen("MapToCode["));
198 range.chop(1);
199 if (range.length() != 3
200 || range.at(1) != QLatin1Char('-')) {
201 qWarning("Invalid range for char mapping function: %s", qPrintable(range));
202 continue;
203 }
204 TransitionSequence seq;
205 seq.first = range.at(0).unicode();
206 seq.last = range.at(2).unicode();
207 seq.testFunction = entry.value;
208 charFunctionRanges.append(seq);
209 }
210
211 QString tokenPrefix = section.value("TokenPrefix");
212 if (!tokenPrefix.isEmpty()) {
213 for (int i = 0; i < dfa.count(); ++i)
214 if (!dfa.at(i).symbol.isEmpty()
215 && !dfa.at(i).symbol.endsWith(QLatin1String("()")))
216 dfa[i].symbol.prepend(tokenPrefix);
217 }
218
219 headerFileName = section.value("FileHeader");
220 }
221
adjacentKeys(int left,int right)222 static inline bool adjacentKeys(int left, int right) { return left + 1 == right; }
223 //static inline bool adjacentKeys(const InputType &left, const InputType &right)
224 //{ return left.val + 1 == right.val; }
225
convertToSequences(const TransitionMap & transitions)226 static QVector<Generator::TransitionSequence> convertToSequences(const TransitionMap &transitions)
227 {
228 QVector<Generator::TransitionSequence> sequences;
229 if (transitions.isEmpty())
230 return sequences;
231
232 QList<InputType> keys = transitions.keys();
233 std::sort(keys.begin(), keys.end());
234 int i = 0;
235 Generator::TransitionSequence sequence;
236 sequence.first = keys.at(0);
237 ++i;
238 for (; i < keys.count(); ++i) {
239 if (adjacentKeys(keys.at(i - 1), keys.at(i))
240 && transitions.value(keys.at(i)) == transitions.value(keys.at(i - 1))) {
241 continue;
242 }
243 sequence.last = keys.at(i - 1);
244 sequence.transition = transitions.value(sequence.last);
245 sequences.append(sequence);
246
247 sequence.first = keys.at(i);
248 }
249 sequence.last = keys.at(i - 1);
250 sequence.transition = transitions.value(sequence.last);
251 sequences.append(sequence);
252
253 return sequences;
254 }
255
operator <<(QDebug & debug,const Generator::TransitionSequence & seq)256 QDebug &operator<<(QDebug &debug, const Generator::TransitionSequence &seq)
257 {
258 return debug << "[first:" << seq.first << "; last:" << seq.last << "; transition:" << seq.transition
259 << (seq.testFunction.isEmpty() ? QString() : QString(QString("; testfunction:" + seq.testFunction)))
260 << "]";
261 }
262
isSingleReferencedFinalState(int i) const263 bool Generator::isSingleReferencedFinalState(int i) const
264 {
265 return backReferenceMap.value(i) == 1
266 && dfa.at(i).transitions.isEmpty()
267 && !dfa.at(i).symbol.isEmpty();
268 }
269
generateTransitions(CodeBlock & body,const TransitionMap & transitions)270 void Generator::generateTransitions(CodeBlock &body, const TransitionMap &transitions)
271 {
272 if (transitions.isEmpty())
273 return;
274
275 QVector<TransitionSequence> sequences = convertToSequences(transitions);
276
277 bool needsCharFunction = false;
278 if (!charFunctionRanges.isEmpty()) {
279 int i = 0;
280 while (i < sequences.count()) {
281 const TransitionSequence &seq = sequences.at(i);
282 if (!seq.testFunction.isEmpty()) {
283 ++i;
284 continue;
285 }
286
287 foreach (TransitionSequence range, charFunctionRanges)
288 if (range.first >= seq.first && range.last <= seq.last) {
289 needsCharFunction = true;
290
291 TransitionSequence left, middle, right;
292
293 left.first = seq.first;
294 left.last = range.first - 1;
295 left.transition = seq.transition;
296
297 middle = range;
298 middle.transition = seq.transition;
299
300 right.first = range.last + 1;
301 right.last = seq.last;
302 right.transition = seq.transition;
303
304 sequences.remove(i);
305 if (left.last >= left.first) {
306 sequences.insert(i, left);
307 ++i;
308 }
309 sequences.insert(i, middle);
310 ++i;
311 if (right.last >= right.first) {
312 sequences.insert(i, right);
313 ++i;
314 }
315
316 i = -1;
317 break;
318 }
319
320 ++i;
321 }
322 }
323
324 //qDebug() << "sequence count" << sequences.count();
325 //qDebug() << sequences;
326
327 if (sequences.count() < 10
328 || sequences.last().last == maxInput
329 || needsCharFunction) {
330 foreach (TransitionSequence seq, sequences) {
331 const bool embedFinalState = isSingleReferencedFinalState(seq.transition);
332
333 QString brace;
334 if (embedFinalState)
335 brace = " {";
336
337 if (!seq.testFunction.isEmpty()) {
338 body << "if (" << seq.testFunction << ")" << brace;
339 } else if (seq.first == seq.last) {
340 body << "if (ch.unicode() == " << seq.first << ")" << brace;
341 } else {
342 if (seq.last < maxInput)
343 body << "if (ch.unicode() >= " << seq.first
344 << " && ch.unicode() <= " << seq.last << ")" << brace;
345 else
346 body << "if (ch.unicode() >= " << seq.first << ")" << brace;
347 }
348 body.indent();
349 if (embedFinalState) {
350 body << "token = " << dfa.at(seq.transition).symbol << ";";
351 body << "goto found;";
352
353 body.outdent();
354 body << "}";
355 } else {
356 body << "goto state_" << seq.transition << ";";
357 body.outdent();
358 }
359 }
360 } else {
361 QList<InputType> keys = transitions.keys();
362 std::sort(keys.begin(), keys.end());
363
364 body << "switch (ch.unicode()) {";
365 body.indent();
366 for (int k = 0; k < keys.count(); ++k) {
367 const InputType key = keys.at(k);
368 const int trans = transitions.value(key);
369
370 QString keyStr;
371 if (key == '\\')
372 keyStr = QString("\'\\\\\'");
373 else if (key >= 48 && key < 127)
374 keyStr = QString('\'') + QChar::fromLatin1(char(key)) + QChar('\'');
375 else
376 keyStr = QString::number(key);
377
378 if (k < keys.count() - 1
379 && transitions.value(keys.at(k + 1)) == trans) {
380 body << "case " << keyStr << ":";
381 } else {
382 if (isSingleReferencedFinalState(trans)) {
383 body << "case " << keyStr << ": token = " << dfa.at(trans).symbol << "; goto found;";
384 } else {
385 body << "case " << keyStr << ": goto state_" << trans << ";";
386 }
387 }
388 }
389 body.outdent();
390 body << "}";
391 }
392 }
393
generate()394 QString Generator::generate()
395 {
396 Class klass(cfg.className);
397
398 klass.addMember(Class::PublicMember, "QString input");
399 klass.addMember(Class::PublicMember, "int pos");
400 klass.addMember(Class::PublicMember, "int lexemStart");
401 klass.addMember(Class::PublicMember, "int lexemLength");
402
403 {
404 CodeBlock body;
405 body << "input = inp;";
406 body << "pos = 0;";
407 body << "lexemStart = 0;";
408 body << "lexemLength = 0;";
409 klass.addConstructor(Class::PublicMember, body, "const QString &inp");
410 }
411
412 {
413 Function next("QChar", "next()");
414 next.setInline(true);
415 if (cfg.caseSensitivity == Qt::CaseSensitive)
416 next.addBody("return (pos < input.length()) ? input.at(pos++) : QChar();");
417 else
418 next.addBody("return (pos < input.length()) ? input.at(pos++).toLower() : QChar();");
419 klass.addMember(Class::PublicMember, next);
420 }
421
422 /*
423 {
424 Function lexem("QString", "lexem()");
425 lexem.setConst(true);
426 lexem.setInline(true);
427 lexem.addBody("return input.mid(lexemStart, lexemLength);");
428 klass.addMember(Class::PublicMember, lexem);
429 }
430 */
431
432 for (int i = 0; i < dfa.count(); ++i)
433 if (dfa.at(i).symbol.endsWith(QLatin1String("()"))) {
434 Function handlerFunc("int", dfa.at(i).symbol);
435 klass.addMember(Class::PublicMember, handlerFunc);
436 }
437
438 Function lexFunc;
439 lexFunc.setReturnType("int");
440 lexFunc.setName("lex()");
441
442 CodeBlock body;
443 body << "lexemStart = pos;";
444 body << "lexemLength = 0;";
445 body << "int lastAcceptingPos = -1;";
446 body << "int token = -1;";
447 body << "QChar ch;";
448 body.addNewLine();
449
450 backReferenceMap.clear();
451 foreach (State s, dfa)
452 foreach (int state, s.transitions)
453 backReferenceMap[state]++;
454
455 bool haveSingleReferencedFinalState = false;
456
457 for (int i = 0; i < dfa.count(); ++i) {
458 if (isSingleReferencedFinalState(i)) {
459 haveSingleReferencedFinalState = true;
460 continue;
461 }
462
463 if (i > 0)
464 body << "state_" << i << ":";
465 else
466 body << "// initial state";
467
468 body.indent();
469
470 if (!dfa.at(i).symbol.isEmpty()) {
471 body << "lastAcceptingPos = pos;";
472 body << "token = " << dfa.at(i).symbol << ";";
473 }
474
475 body.outdent();
476
477 body.indent();
478
479 if (!dfa.at(i).transitions.isEmpty()) {
480 body << "ch = next();";
481 generateTransitions(body, dfa.at(i).transitions);
482 }
483
484 body << "goto out;";
485
486 body.outdent();
487 }
488
489 if (haveSingleReferencedFinalState) {
490 body << "found:";
491 body << "lastAcceptingPos = pos;";
492 body.addNewLine();
493 }
494
495 body << "out:";
496 body << "if (lastAcceptingPos != -1) {";
497 body.indent();
498 body << "lexemLength = lastAcceptingPos - lexemStart;";
499 body << "pos = lastAcceptingPos;";
500 body.outdent();
501 body << "}";
502 body << "return token;";
503
504 lexFunc.addBody(body);
505
506 klass.addMember(Class::PublicMember, lexFunc);
507
508 QString header;
509 if (!headerFileName.isEmpty()) {
510 QString self(QDir::fromNativeSeparators(QStringLiteral(__FILE__)));
511 int lastSep = self.lastIndexOf(QChar('/'));
512 QDir here(lastSep < 0 ? QStringLiteral(".") : self.left(lastSep));
513 QFile headerFile(QDir::cleanPath(here.filePath(headerFileName)));
514 if (headerFile.exists() && headerFile.open(QIODevice::ReadOnly))
515 header = QString::fromUtf8(headerFile.readAll());
516 }
517
518 header += QLatin1String("// auto generated by qtbase/util/lexgen/. DO NOT EDIT.\n");
519
520 return header + klass.declaration() + klass.definition();
521 }
522
523