1 /*
2     SPDX-License-Identifier: GPL-2.0-or-later
3     SPDX-FileCopyrightText: 2006-2020 Umbrello UML Modeller Authors <umbrello-devel@kde.org>
4 */
5 
6 // own header
7 #include "pythonimport.h"
8 
9 // app includes
10 #include "attribute.h"
11 #include "classifier.h"
12 #include "codeimpthread.h"
13 #include "debug_utils.h"
14 #include "enum.h"
15 #include "import_utils.h"
16 #include "operation.h"
17 #include "package.h"
18 #include "uml.h"
19 #include "umldoc.h"
20 #include "umlpackagelist.h"
21 
22 // qt includes
23 #include <QRegExp>
24 
25 /**
26  * Constructor.
27  */
PythonImport(CodeImpThread * thread)28 PythonImport::PythonImport(CodeImpThread* thread)
29   : NativeImportBase(QLatin1String("#"), thread)
30 {
31     setMultiLineComment(QLatin1String("\"\"\""), QLatin1String("\"\"\""));
32     initVars();
33 }
34 
35 /**
36  * Destructor.
37  */
~PythonImport()38 PythonImport::~PythonImport()
39 {
40 }
41 
42 /**
43  * Reimplement operation from NativeImportBase.
44  */
initVars()45 void PythonImport::initVars()
46 {
47     m_srcIndentIndex = 0;
48     m_srcIndent[m_srcIndentIndex] = 0;
49     m_braceWasOpened = false;
50     m_isStatic = false;
51 }
52 
53 /**
54  * Reimplement operation from NativeImportBase.
55  * In addition to handling multiline comments, this method transforms
56  * changes in leading indentation into braces (opening brace for increase
57  * in indentation, closing brace for decrease in indentation) in m_source.
58  * Removal of Python's indentation sensitivity simplifies subsequent
59  * processing using Umbrello's native import framework.
60  * @param line   the line to preprocess
61  * @return success status of operation
62  */
preprocess(QString & line)63 bool PythonImport::preprocess(QString& line)
64 {
65     if (NativeImportBase::preprocess(line))
66         return true;
67     // Handle single line comment
68     int pos = line.indexOf(m_singleLineCommentIntro);
69     if (pos != -1) {
70         QString cmnt = line.mid(pos);
71         m_source.append(cmnt);
72         m_srcIndex++;
73         if (pos == 0)
74             return true;
75         line = line.left(pos);
76         line.remove(QRegExp(QLatin1String("\\s+$")));
77     }
78     // Transform changes in indentation into braces a la C++/Java/Perl/...
79     pos = line.indexOf(QRegExp(QLatin1String("\\S")));
80     if (pos == -1)
81         return true;
82     bool isContinuation = false;
83     int leadingWhite = line.left(pos).count(QRegExp(QLatin1String("\\s")));
84     if (leadingWhite > m_srcIndent[m_srcIndentIndex]) {
85         if (m_srcIndex == 0) {
86             uError() << "internal error";
87             return true;
88         }
89         if (m_braceWasOpened) {
90             m_srcIndent[++m_srcIndentIndex] = leadingWhite;
91             m_braceWasOpened = false;
92         } else {
93             isContinuation = true;
94         }
95     } else {
96         while (m_srcIndentIndex > 0 && leadingWhite < m_srcIndent[m_srcIndentIndex]) {
97             m_srcIndentIndex--;
98             m_source.append(QLatin1String("}"));
99             m_srcIndex++;
100         }
101     }
102 
103     if (m_braceWasOpened && m_srcIndentIndex == 0) {
104         m_source.append(QLatin1String("}"));
105         m_srcIndex++;
106     }
107 
108     if (line.endsWith(QLatin1Char(':'))) {
109         line.replace(QRegExp(QLatin1String(":$")), QLatin1String("{"));
110         m_braceWasOpened = true;
111     } else {
112         m_braceWasOpened = false;
113     }
114     if (!isContinuation && !m_braceWasOpened)
115         line += QLatin1Char(';');
116     return false;  // The input was not completely consumed by preprocessing.
117 }
118 
119 /**
120  * Implement abstract operation from NativeImportBase.
121  * @param word   whitespace delimited item
122  */
fillSource(const QString & word)123 void PythonImport::fillSource(const QString& word)
124 {
125     QString lexeme;
126     const uint len = word.length();
127     for (uint i = 0; i < len; ++i) {
128         const QChar& c = word[i];
129         if (c.isLetterOrNumber() || c == QLatin1Char('_') || c == QLatin1Char('.')) {
130             lexeme += c;
131         } else {
132             if (!lexeme.isEmpty()) {
133                 m_source.append(lexeme);
134                 m_srcIndex++;
135                 lexeme.clear();
136             }
137             m_source.append(QString(c));
138             m_srcIndex++;
139         }
140     }
141     if (!lexeme.isEmpty()) {
142         m_source.append(lexeme);
143         m_srcIndex++;
144     }
145 }
146 
147 /**
148  * Return an amount of spaces that corresponds to @param level
149  * @return spaces of indentation
150  */
indentation(int level)151 QString PythonImport::indentation(int level)
152 {
153     QString spaces;
154     for (int i = 0; i < level; ++i) {
155         spaces += QLatin1String("  ");
156     }
157     return spaces;
158 }
159 
160 /**
161  * Skip ahead to outermost closing brace.
162  * @return  body contents skipped
163  */
skipBody()164 QString PythonImport::skipBody()
165 {
166     /* During input preprocessing, changes in indentation were replaced by
167        braces, and a semicolon was appended to each line ending.
168        In order to return the body, we try to reconstruct the original Python
169        syntax by reverting those changes.
170      */
171     QString body;
172     if (m_source[m_srcIndex] != QLatin1String("{"))
173         skipStmt(QLatin1String("{"));
174     bool firstTokenAfterNewline = true;
175     int braceNesting = 0;
176     QString token;
177     while (!(token = advance()).isNull()) {
178         if (token == QLatin1String("}")) {
179             if (braceNesting <= 0)
180                 break;
181             braceNesting--;
182             body += QLatin1Char('\n');
183             firstTokenAfterNewline = true;
184         } else if (token == QLatin1String("{")) {
185             braceNesting++;
186             body += QLatin1String(":\n");
187             firstTokenAfterNewline = true;
188         } else if (token == QLatin1String(";")) {
189             body += QLatin1Char('\n');
190             firstTokenAfterNewline = true;
191         } else {
192             if (firstTokenAfterNewline) {
193                 body += indentation(braceNesting);
194                 firstTokenAfterNewline = false;
195             } else if (body.contains(QRegExp(QLatin1String("\\w$"))) &&
196                        token.contains(QRegExp(QLatin1String("^\\w")))) {
197                 body += QLatin1Char(' ');
198             }
199             body += token;
200         }
201     }
202     return body;
203 }
204 
205 /**
206  * Parses a python initializer
207  * @param _keyword current string from parser
208  * @param type returns type of assignment
209  * @param value returns assignment value
210  * @return success status of parsing
211  */
parseInitializer(const QString & _keyword,QString & type,QString & value)212 bool PythonImport::parseInitializer(const QString &_keyword, QString &type, QString &value)
213 {
214     QString keyword = _keyword;
215     if (_keyword == QLatin1String("-"))
216         keyword.append(advance());
217 
218     if (keyword == QLatin1String("[")) {
219         type = QLatin1String("list");
220         int index = m_srcIndex;
221         skipToClosing(QLatin1Char('['));
222         for (int i = index; i <= m_srcIndex; i++)
223             value += m_source[i];
224     } else if (keyword == QLatin1String("{")) {
225         type = QLatin1String("dict");
226         int index = m_srcIndex;
227         skipToClosing(QLatin1Char('{'));
228         for (int i = index; i <= m_srcIndex; i++)
229             value += m_source[i];
230     } else if (keyword == QLatin1String("(")) {
231         type = QLatin1String("tuple");
232         int index = m_srcIndex;
233         skipToClosing(QLatin1Char('('));
234         for (int i = index; i <= m_srcIndex; i++)
235             value += m_source[i];
236     } else if (keyword.startsWith(QLatin1String("\""))) {
237         type = QLatin1String("string");
238         value = keyword;
239     } else if (keyword == QLatin1String("True") || keyword == QLatin1String("False")) {
240         type = QLatin1String("bool");
241         value = keyword;
242     } else if (keyword.contains(QRegExp(QLatin1String("-?\\d+\\.\\d*")))) {
243         type = QLatin1String("float");
244         value = keyword;
245     } else if (keyword.contains(QRegExp(QLatin1String("-?\\d+")))) {
246         type = QLatin1String("int");
247         value = keyword;
248     } else if (keyword.toLower() == QLatin1String("none")) {
249         type = QLatin1String("object");
250         value = keyword;
251     } else if (!keyword.isEmpty()) {
252         if (lookAhead() == QLatin1String("(")) {
253             advance();
254             type = keyword;
255             int index = m_srcIndex;
256             skipToClosing(QLatin1Char('('));
257             for (int i = index; i <= m_srcIndex; i++)
258                 value += m_source[i];
259         } else
260             type = QLatin1String("object");
261     } else
262         type = QLatin1String("object");
263     return true;
264 }
265 
266 /**
267  * Parse assignments in the form \<identifier\> '=' \<value\>
268  * Instance variables are identified by a prefixed 'self.'.
269  * @param keyword current string from parser
270  * @return success status of parsing
271  */
parseAssignmentStmt(const QString & keyword)272 bool PythonImport::parseAssignmentStmt(const QString &keyword)
273 {
274     QString variableName = keyword;
275 
276     bool isStatic = true;
277     if (variableName.startsWith(QLatin1String("self."))) {
278         variableName.remove(0,5);
279         isStatic = false;
280     }
281     Uml::Visibility::Enum visibility = Uml::Visibility::Public;
282     if (variableName.startsWith(QLatin1String("__"))) {
283         visibility = Uml::Visibility::Private;
284         variableName.remove(0, 2);
285     } else if (variableName.startsWith(QLatin1String("_"))) {
286         visibility = Uml::Visibility::Protected;
287         variableName.remove(0, 1);
288     }
289 
290     QString type;
291     QString initialValue;
292     if (advance() == QLatin1String("=")) {
293 
294         if (!parseInitializer(advance(), type, initialValue))
295             return false;
296     }
297 
298     UMLObject* o = Import_Utils::insertAttribute(m_klass, visibility, variableName,
299                                                  type, m_comment, false);
300     UMLAttribute* a = o->asUMLAttribute();
301     a->setInitialValue(initialValue);
302     a->setStatic(isStatic);
303     return true;
304 }
305 
306 /**
307  * Parses method parameter list
308  * @param op UMLOperation instance to add parameter
309  * @return success status of parsing
310  */
parseMethodParameters(UMLOperation * op)311 bool PythonImport::parseMethodParameters(UMLOperation *op)
312 {
313     bool firstParam = true;
314     UMLAttribute *attr = nullptr;
315     while (m_srcIndex < m_source.count() && advance() != QLatin1String(")")) {
316         const QString& parName = m_source[m_srcIndex];
317         if (attr && parName == QLatin1String("=")) {
318             QString type, value;
319             parseInitializer(advance(), type, value);
320             attr->setInitialValue(value);
321             attr->setTypeName(type);
322         } else {
323             if (firstParam) {
324                 if (parName.compare(QLatin1String("self"), Qt::CaseInsensitive) != 0) {
325                     m_isStatic = true;
326                     attr = Import_Utils::addMethodParameter(op, QLatin1String("string"), parName);
327                 }
328                 firstParam = false;
329             } else {
330                 attr = Import_Utils::addMethodParameter(op, QLatin1String("string"), parName);
331             }
332         }
333         if (lookAhead() == QLatin1String(","))
334             advance();
335     }
336     return true;
337 }
338 
339 /**
340  * Implement abstract operation from NativeImportBase.
341  * @return success status of operation
342  */
parseStmt()343 bool PythonImport::parseStmt()
344 {
345     const int srcLength = m_source.count();
346     QString keyword = m_source[m_srcIndex];
347     if (keyword == QLatin1String("class")) {
348         const QString& name = advance();
349         UMLObject *ns = Import_Utils::createUMLObject(UMLObject::ot_Class, name,
350                                                       currentScope(), m_comment);
351         pushScope(m_klass = ns->asUMLClassifier());
352         m_comment.clear();
353         if (advance() == QLatin1String("(")) {
354             while (m_srcIndex < srcLength - 1 && advance() != QLatin1String(")")) {
355                 const QString& baseName = m_source[m_srcIndex];
356                 Import_Utils::createGeneralization(m_klass, baseName);
357                 if (advance() != QLatin1String(","))
358                     break;
359             }
360         }
361         if (m_source[m_srcIndex] != QLatin1String("{")) {
362             skipStmt(QLatin1String("{"));
363         }
364         log(QLatin1String("class ") + name);
365         return true;
366     }
367     if (keyword == QLatin1String("@")) {
368         const QString& annotation = m_source[++m_srcIndex];
369         uDebug() << "annotation:" << annotation;
370         if (annotation == QLatin1String("staticmethod"))
371             m_isStatic = true;
372         return true;
373     }
374     if (keyword == QLatin1String("def")) {
375         if (m_klass == 0) {
376             // skip functions outside of a class
377             skipBody();
378             return true;
379         }
380 
381         if (!m_klass->hasDoc() && !m_comment.isEmpty()) {
382             m_klass->setDoc(m_comment);
383             m_comment = QString();
384         }
385 
386         QString name = advance();
387         bool isConstructor = name == QLatin1String("__init__");
388         Uml::Visibility::Enum visibility = Uml::Visibility::Public;
389         if (!isConstructor) {
390             if (name.startsWith(QLatin1String("__"))) {
391                 name = name.mid(2);
392                 visibility = Uml::Visibility::Private;
393             } else if (name.startsWith(QLatin1String("_"))) {
394                 name = name.mid(1);
395                 visibility = Uml::Visibility::Protected;
396             }
397         }
398         UMLOperation *op = Import_Utils::makeOperation(m_klass, name);
399         if (advance() != QLatin1String("(")) {
400             uError() << "importPython def " << name << ": expecting \"(\"";
401             skipBody();
402             return true;
403         }
404         if (!parseMethodParameters(op)) {
405             uError() << "importPython error on parsing method parameter for method " << name;
406             skipBody();
407             return true;
408         }
409 
410         Import_Utils::insertMethod(m_klass, op, visibility, QLatin1String("string"),
411                                    m_isStatic, false /*isAbstract*/, false /*isFriend*/,
412                                    isConstructor, false, m_comment);
413         m_isStatic = false;
414         int srcIndex = m_srcIndex;
415         op->setSourceCode(skipBody());
416 
417         if (!op->hasDoc() && !m_comment.isEmpty()) {
418             op->setDoc(m_comment);
419             m_comment = QString();
420         }
421 
422         // parse instance variables from __init__ method
423         if (isConstructor) {
424             int indexSave = m_srcIndex;
425             m_srcIndex = srcIndex;
426             advance();
427             keyword = advance();
428             while (m_srcIndex < indexSave) {
429                 if (lookAhead() == QLatin1String("=")) {
430                     parseAssignmentStmt(keyword);
431                     // skip ; inserted by lexer
432                     if (lookAhead() == QLatin1String(";")) {
433                         advance();
434                         keyword = advance();
435                     }
436                 } else {
437                     skipStmt(QLatin1String(";"));
438                     keyword = advance();
439                 }
440             }
441             m_srcIndex = indexSave;
442         }
443         log(QLatin1String("def ") + name);
444 
445         return true;
446     }
447 
448     // parse class variables
449     if (m_klass && lookAhead() == QLatin1String("=")) {
450         bool result = parseAssignmentStmt(keyword);
451         log(QLatin1String("class attribute ") + keyword);
452         return result;
453     }
454 
455     if (keyword == QLatin1String("}")) {
456         if (scopeIndex()) {
457             m_klass = popScope()->asUMLClassifier();
458         }
459         else
460             uError() << "parsing: too many }";
461         return true;
462     }
463     return false;  // @todo parsing of attributes
464 }
465