1 /*
2     SPDX-License-Identifier: GPL-2.0-or-later
3     SPDX-FileCopyrightText: 2005-2020 Umbrello UML Modeller Authors <umbrello-devel@kde.org>
4 */
5 
6 // own header
7 #include "nativeimportbase.h"
8 
9 // app includes
10 #include "codeimpthread.h"
11 #include "debug_utils.h"
12 #include "import_utils.h"
13 
14 // kde includes
15 #include <KLocalizedString>
16 
17 // qt includes
18 #include <QFile>
19 #include <QRegExp>
20 #include <QTextStream>
21 
22 QStringList NativeImportBase::m_parsedFiles;  // static, see nativeimportbase.h
23 
24 /**
25  * Constructor
26  * @param singleLineCommentIntro  "//" for IDL and Java, "--" for Ada
27  * @param thread                  thread in which the code import runs
28  */
NativeImportBase(const QString & singleLineCommentIntro,CodeImpThread * thread)29 NativeImportBase::NativeImportBase(const QString &singleLineCommentIntro, CodeImpThread* thread)
30   : ClassImport(thread),
31     m_singleLineCommentIntro(singleLineCommentIntro),
32     m_srcIndex(0),
33     m_klass(0),
34     m_currentAccess(Uml::Visibility::Public),
35     m_inComment(false),
36     m_isAbstract(false)
37 {
38 }
39 
40 /**
41  * Destructor.
42  */
~NativeImportBase()43 NativeImportBase::~NativeImportBase()
44 {
45 }
46 
47 /**
48  * Set the delimiter strings for a multi line comment.
49  * @param intro  In languages with a C style multiline comment
50  *               this is slash-star.
51  * @param end    In languages with a C style multiline comment
52  *               this is star-slash.
53  */
setMultiLineComment(const QString & intro,const QString & end)54 void NativeImportBase::setMultiLineComment(const QString &intro, const QString &end)
55 {
56     m_multiLineCommentIntro = intro;
57     m_multiLineCommentEnd = end;
58 }
59 
60 /**
61  * Set the delimiter strings for an alternative form of
62  * multi line comment. See setMultiLineComment().
63  * @param intro   the start comment string
64  * @param end     the end comment string
65  */
setMultiLineAltComment(const QString & intro,const QString & end)66 void NativeImportBase::setMultiLineAltComment(const QString &intro, const QString &end)
67 {
68     m_multiLineAltCommentIntro = intro;
69     m_multiLineAltCommentEnd = end;
70 }
71 
72 /**
73  * Advance m_srcIndex until m_source[m_srcIndex] contains the lexeme
74  * given by `until'.
75  * @param until   the target string
76  */
skipStmt(const QString & until)77 void NativeImportBase::skipStmt(const QString& until /* = ";" */)
78 {
79     const int srcLength = m_source.count();
80     while (m_srcIndex < srcLength && m_source[m_srcIndex] != until)
81         m_srcIndex++;
82 }
83 
84 /**
85  * Advance m_srcIndex to the index of the corresponding closing character
86  * of the given opening.  Nested opening/closing pairs are respected.
87  * Valid openers are:    '{'  '['  '('  '<'
88  * @param  opener   the opener string
89  * @return  True for success, false for misuse (invalid opener) or
90  *          if no matching closing character is found in m_source.
91  */
skipToClosing(QChar opener)92 bool NativeImportBase::skipToClosing(QChar opener)
93 {
94     QString closing;
95     switch (opener.toLatin1()) {
96         case '{':
97             closing = QLatin1String("}");
98             break;
99         case '[':
100             closing = QLatin1String("]");
101             break;
102         case '(':
103             closing = QLatin1String(")");
104             break;
105         case '<':
106             closing = QLatin1String(">");
107             break;
108         default:
109             uError() << "opener='" << opener << "': illegal input character";
110             return false;
111     }
112     const QString opening(opener);
113     skipStmt(opening);
114     const int srcLength = m_source.count();
115     int nesting = 0;
116     while (m_srcIndex < srcLength) {
117         QString nextToken = advance();
118         if (nextToken.isEmpty())
119             break;
120         if (nextToken == closing) {
121             if (nesting <= 0)
122                 break;
123             nesting--;
124         } else if (nextToken == opening) {
125             nesting++;
126         }
127     }
128     if (m_srcIndex == srcLength)
129         return false;
130     return true;
131 }
132 
133 /**
134  * Set package as current scope.
135 
136  * @param p UML package to set as current scope
137  */
pushScope(UMLPackage * p)138 void NativeImportBase::pushScope(UMLPackage *p)
139 {
140     m_scope.append(p);
141 }
142 
143 /**
144  * Return previously defined scope.
145  *
146  * @return previous scope
147  */
popScope()148 UMLPackage *NativeImportBase::popScope()
149 {
150     m_scope.takeLast();
151     UMLPackage *p = m_scope.last();
152     return p;
153 }
154 
155 /**
156  * Return current scope.
157  *
158  * @return scope
159  */
currentScope()160 UMLPackage *NativeImportBase::currentScope()
161 {
162     UMLPackage *p = m_scope.last();
163     return p;
164 }
165 
166 /**
167  * Return current scope index.
168  *
169  * @return >= 0 index, -1 empty
170  */
scopeIndex()171 int NativeImportBase::scopeIndex()
172 {
173     return m_scope.size() - 1;
174 }
175 
176 /**
177  * Get the next lexeme without advancing.
178  * @return  the next lexeme or an empty string
179  */
lookAhead()180 QString NativeImportBase::lookAhead()
181 {
182     if (m_srcIndex < m_source.count() - 1)
183         return m_source[m_srcIndex+1];
184     return QString();
185 }
186 
187 /**
188  * Advance m_srcIndex until m_source[m_srcIndex] contains a non-comment.
189  * Comments encountered during advancement are accumulated in `m_comment'.
190  * If m_srcIndex hits the end of m_source then QString() is returned.
191  * @return the current lexeme or an empty string
192  */
advance()193 QString NativeImportBase::advance()
194 {
195     while (m_srcIndex < m_source.count() - 1) {
196         m_srcIndex++;
197         if (m_source[m_srcIndex].startsWith(m_singleLineCommentIntro))
198             m_comment += m_source[m_srcIndex].mid(m_singleLineCommentIntro.length());
199         else
200             break;
201     }
202     if (m_srcIndex >= m_source.count() - 1 ||
203         // if last item in m_source is a comment then it is dropped too
204         (m_srcIndex == m_source.count() - 1 &&
205          m_source[m_srcIndex].startsWith(m_singleLineCommentIntro))) {
206         return QString();
207     }
208     return m_source[m_srcIndex];
209 }
210 
211 /**
212  * Preprocess a line.
213  * May modify the given line to remove items consumed by the
214  * preprocessing such as comments or preprocessor directives.
215  * The default implementation handles multi-line comments.
216  * @param line  The line to preprocess.
217  * @return      True if the line was completely consumed,
218  *              false if there are still items left in the line
219  *              for further analysis.
220  */
preprocess(QString & line)221 bool NativeImportBase::preprocess(QString& line)
222 {
223     if (line.isEmpty())
224         return true;
225     if (m_multiLineCommentIntro.isEmpty())
226         return false;
227     // Check for end of multi line comment.
228     if (m_inComment) {
229         int delimiterLen = 0;
230         int pos = line.indexOf(m_multiLineCommentEnd);
231         if (pos == -1) {
232             if (! m_multiLineAltCommentEnd.isEmpty())
233                 pos = line.indexOf(m_multiLineAltCommentEnd);
234             if (pos == -1) {
235                 m_comment += line + QLatin1Char('\n');
236                 return true;  // done
237             }
238             delimiterLen = m_multiLineAltCommentEnd.length();
239         } else {
240             delimiterLen = m_multiLineCommentEnd.length();
241         }
242         if (pos > 0) {
243             QString text = line.mid(0, pos - 1);
244             m_comment += text.trimmed();
245         }
246         m_source.append(m_singleLineCommentIntro + m_comment);  // denotes comments in `m_source'
247         m_srcIndex++;
248         m_comment = QString();
249         m_inComment = false;
250         pos += delimiterLen;  // pos now points behind the closed comment
251         if (pos == (int)line.length())
252             return true;  // done
253         line = line.mid(pos);
254     }
255     // If we get here then m_inComment is false.
256     // Check for start of multi line comment.
257     int delimIntroLen = 0;
258     int delimEndLen = 0;
259     int pos = line.indexOf(m_multiLineCommentIntro);
260     if (pos != -1) {
261         delimIntroLen = m_multiLineCommentIntro.length();
262     } else if (!m_multiLineAltCommentIntro.isEmpty()) {
263         pos = line.indexOf(m_multiLineAltCommentIntro);
264         if (pos != -1)
265             delimIntroLen = m_multiLineAltCommentIntro.length();
266     }
267     if (pos != -1) {
268         int sPos = line.indexOf(m_singleLineCommentIntro);
269         if (sPos != -1 && sPos < pos) {
270             // multi line comment intro found in single line comment
271             pos = -1;      // is no multi line comment after all
272         }
273     }
274     if (pos != -1) {
275         int endpos = line.indexOf(m_multiLineCommentEnd, pos + delimIntroLen);
276         if (endpos != -1) {
277             delimEndLen = m_multiLineCommentEnd.length();
278         } else if (!m_multiLineAltCommentEnd.isEmpty()) {
279             endpos = line.indexOf(m_multiLineAltCommentEnd, pos + delimIntroLen);
280             if (endpos != -1)
281                 delimEndLen = m_multiLineAltCommentEnd.length();
282         }
283         if (endpos == -1) {
284             m_inComment = true;
285             if (pos + delimIntroLen < (int)line.length()) {
286                 QString cmnt = line.mid(pos + delimIntroLen);
287                 m_comment += cmnt.trimmed() + QLatin1Char('\n');
288             }
289             if (pos == 0)
290                 return true;  // done
291             line = line.left(pos);
292         } else {   // It's a multiline comment on a single line.
293             if (endpos > pos + delimIntroLen)  {
294                 QString cmnt = line.mid(pos + delimIntroLen, endpos - pos - delimIntroLen);
295                 cmnt = cmnt.trimmed();
296                 if (!cmnt.isEmpty())
297                     m_source.append(m_singleLineCommentIntro + cmnt);
298             }
299             endpos++;  // endpos now points at the slash of "*/"
300             QString pre;
301             if (pos > 0)
302                 pre = line.left(pos);
303             QString post;
304             if (endpos + delimEndLen < (int)line.length())
305                 post = line.mid(endpos + 1);
306             line = pre + post;
307         }
308     }
309     return false;  // The input was not completely consumed by preprocessing.
310 }
311 
312 /**
313  * Split the line so that a string is returned as a single element of the list.
314  * When not in a string then split at white space.
315  * The default implementation is suitable for C style strings and char constants.
316  * @param line   the line to split
317  * @return the parts of the line
318  */
split(const QString & line)319 QStringList NativeImportBase::split(const QString& line)
320 {
321     QStringList list;
322     QString listElement;
323     QChar stringIntro = 0;  // buffers the string introducer character
324     bool seenSpace = false;
325     QString ln = line.trimmed();
326     for (int i = 0; i < ln.length(); ++i) {
327         const QChar& c = ln[i];
328         if (stringIntro.toLatin1()) {        // we are in a string
329             listElement += c;
330             if (c == stringIntro) {
331                 if (ln[i - 1] != QLatin1Char('\\')) {
332                     list.append(listElement);
333                     listElement.clear();
334                     stringIntro = 0;  // we are no longer in a string
335                 }
336             }
337         } else if (c == QLatin1Char('"') || c == QLatin1Char('\'')) {
338             if (!listElement.isEmpty()) {
339                 list.append(listElement);
340             }
341             listElement = stringIntro = c;
342             seenSpace = false;
343         } else if (c == QLatin1Char(' ') || c == QLatin1Char('\t')) {
344             if (seenSpace)
345                 continue;
346             seenSpace = true;
347             if (!listElement.isEmpty()) {
348                 list.append(listElement);
349                 listElement.clear();
350             }
351         } else {
352             listElement += c;
353             seenSpace = false;
354         }
355     }
356     if (!listElement.isEmpty())
357         list.append(listElement);
358     return list;
359 }
360 
361 /**
362  * Scan a single line.
363  * parseFile() calls this for each line read from the input file.
364  * This in turn calls other methods such as preprocess() and fillSource().
365  * The lexer. Tokenizes the given string and fills `m_source'.
366  * Stores possible comments in `m_comment'.
367  * @param line  The line to scan.
368  */
scan(const QString & line)369 void NativeImportBase::scan(const QString& line)
370 {
371     QString ln = line;
372     if (preprocess(ln))
373         return;
374     // Check for single line comment.
375     int pos = ln.indexOf(m_singleLineCommentIntro);
376     if (pos != -1) {
377         QString cmnt = ln.mid(pos);
378         m_source.append(cmnt);
379         if (pos == 0)
380             return;
381         ln = ln.left(pos);
382     }
383     if (ln.contains(QRegExp(QLatin1String("^\\s*$"))))
384         return;
385     const QStringList words = split(ln);
386     for (QStringList::ConstIterator it = words.begin(); it != words.end(); ++it) {
387         QString word = *it;
388         if (word[0] == QLatin1Char('"') || word[0] == QLatin1Char('\''))
389             m_source.append(word);  // string constants are handled by split()
390         else
391             fillSource(word);
392     }
393 }
394 
395 /**
396  * Initialize auxiliary variables.
397  * This is called by the default implementation of parseFile()
398  * after scanning (before parsing the QStringList m_source.)
399  * The default implementation is empty.
400  */
initVars()401 void NativeImportBase::initVars()
402 {
403 }
404 
405 /**
406  * Import a single file.
407  * The default implementation should be feasible for languages that
408  * don't depend on an external preprocessor.
409  * @param filename  The file to import.
410  * @return state of parsing - false means errors
411  */
parseFile(const QString & filename)412 bool NativeImportBase::parseFile(const QString& filename)
413 {
414     QString nameWithoutPath = filename;
415     nameWithoutPath.remove(QRegExp(QLatin1String("^.*/")));
416     if (m_parsedFiles.contains(nameWithoutPath))
417         return true;
418     m_parsedFiles.append(nameWithoutPath);
419     QString fname = filename;
420     const QString msgPrefix = filename + QLatin1String(": ");
421     if (filename.contains(QLatin1Char('/'))) {
422         QString path = filename;
423         path.remove(QRegExp(QLatin1String("/[^/]+$")));
424         uDebug() << msgPrefix << "adding path " << path;
425         Import_Utils::addIncludePath(path);
426     }
427     if (!QFile::exists(filename)) {
428         QFileInfo fi(filename);
429         if (fi.isAbsolute()) {
430             uError() << msgPrefix << "cannot find file";
431             return false;
432         }
433         bool found = false;
434         const QStringList includePaths = Import_Utils::includePathList();
435         for (QStringList::ConstIterator pathIt = includePaths.begin();
436                                    pathIt != includePaths.end(); ++pathIt) {
437             QString path = (*pathIt);
438             if (! path.endsWith(QLatin1Char('/'))) {
439                 path.append(QLatin1Char('/'));
440             }
441             if (QFile::exists(path + filename)) {
442                 fname.prepend(path);
443                 found = true;
444                 break;
445             }
446         }
447         if (! found) {
448             uError() << msgPrefix << "cannot find file";
449             return false;
450         }
451     }
452     QFile file(fname);
453     if (! file.open(QIODevice::ReadOnly)) {
454         uError() << msgPrefix << "cannot open file";
455         return false;
456     }
457     log(nameWithoutPath, QLatin1String("parsing..."));
458     // Scan the input file into the QStringList m_source.
459     m_source.clear();
460     m_srcIndex = 0;
461     initVars();
462     QTextStream stream(&file);
463     int lineCount = 0;
464     while (! stream.atEnd()) {
465         QString line = stream.readLine();
466         lineCount++;
467         scan(line);
468     }
469     log(nameWithoutPath, QLatin1String("file size: ") + QString::number(file.size()) +
470                          QLatin1String(" / lines: ") + QString::number(lineCount));
471     file.close();
472     // Parse the QStringList m_source.
473     m_klass = 0;
474     m_currentAccess = Uml::Visibility::Public;
475     m_scope.clear();
476     pushScope(Import_Utils::globalScope()); // index 0 is reserverd for the global scope
477     const int srcLength = m_source.count();
478     for (m_srcIndex = 0; m_srcIndex < srcLength; ++m_srcIndex) {
479         const QString& firstToken = m_source[m_srcIndex];
480         //uDebug() << '"' << firstToken << '"';
481         if (firstToken.startsWith(m_singleLineCommentIntro)) {
482             m_comment = firstToken.mid(m_singleLineCommentIntro.length());
483             continue;
484         }
485         if (! parseStmt())
486            skipStmt();
487         m_comment.clear();
488     }
489     log(nameWithoutPath, QLatin1String("...end of parse"));
490     return true;
491 }
492 
493 /**
494  * Implement abstract operation from ClassImport.
495  */
initialize()496 void NativeImportBase::initialize()
497 {
498     m_parsedFiles.clear();
499 }
500