1 /*
2 SPDX-License-Identifier: GPL-2.0-or-later
3 SPDX-FileCopyrightText: 2006-2020 Umbrello UML Modeller Authors <umbrello-devel@kde.org>
4 */
5
6 // own header
7 #include "pythonimport.h"
8
9 // app includes
10 #include "attribute.h"
11 #include "classifier.h"
12 #include "codeimpthread.h"
13 #include "debug_utils.h"
14 #include "enum.h"
15 #include "import_utils.h"
16 #include "operation.h"
17 #include "package.h"
18 #include "uml.h"
19 #include "umldoc.h"
20 #include "umlpackagelist.h"
21
22 // qt includes
23 #include <QRegExp>
24
25 /**
26 * Constructor.
27 */
PythonImport(CodeImpThread * thread)28 PythonImport::PythonImport(CodeImpThread* thread)
29 : NativeImportBase(QLatin1String("#"), thread)
30 {
31 setMultiLineComment(QLatin1String("\"\"\""), QLatin1String("\"\"\""));
32 initVars();
33 }
34
35 /**
36 * Destructor.
37 */
~PythonImport()38 PythonImport::~PythonImport()
39 {
40 }
41
42 /**
43 * Reimplement operation from NativeImportBase.
44 */
initVars()45 void PythonImport::initVars()
46 {
47 m_srcIndentIndex = 0;
48 m_srcIndent[m_srcIndentIndex] = 0;
49 m_braceWasOpened = false;
50 m_isStatic = false;
51 }
52
53 /**
54 * Reimplement operation from NativeImportBase.
55 * In addition to handling multiline comments, this method transforms
56 * changes in leading indentation into braces (opening brace for increase
57 * in indentation, closing brace for decrease in indentation) in m_source.
58 * Removal of Python's indentation sensitivity simplifies subsequent
59 * processing using Umbrello's native import framework.
60 * @param line the line to preprocess
61 * @return success status of operation
62 */
preprocess(QString & line)63 bool PythonImport::preprocess(QString& line)
64 {
65 if (NativeImportBase::preprocess(line))
66 return true;
67 // Handle single line comment
68 int pos = line.indexOf(m_singleLineCommentIntro);
69 if (pos != -1) {
70 QString cmnt = line.mid(pos);
71 m_source.append(cmnt);
72 m_srcIndex++;
73 if (pos == 0)
74 return true;
75 line = line.left(pos);
76 line.remove(QRegExp(QLatin1String("\\s+$")));
77 }
78 // Transform changes in indentation into braces a la C++/Java/Perl/...
79 pos = line.indexOf(QRegExp(QLatin1String("\\S")));
80 if (pos == -1)
81 return true;
82 bool isContinuation = false;
83 int leadingWhite = line.left(pos).count(QRegExp(QLatin1String("\\s")));
84 if (leadingWhite > m_srcIndent[m_srcIndentIndex]) {
85 if (m_srcIndex == 0) {
86 uError() << "internal error";
87 return true;
88 }
89 if (m_braceWasOpened) {
90 m_srcIndent[++m_srcIndentIndex] = leadingWhite;
91 m_braceWasOpened = false;
92 } else {
93 isContinuation = true;
94 }
95 } else {
96 while (m_srcIndentIndex > 0 && leadingWhite < m_srcIndent[m_srcIndentIndex]) {
97 m_srcIndentIndex--;
98 m_source.append(QLatin1String("}"));
99 m_srcIndex++;
100 }
101 }
102
103 if (m_braceWasOpened && m_srcIndentIndex == 0) {
104 m_source.append(QLatin1String("}"));
105 m_srcIndex++;
106 }
107
108 if (line.endsWith(QLatin1Char(':'))) {
109 line.replace(QRegExp(QLatin1String(":$")), QLatin1String("{"));
110 m_braceWasOpened = true;
111 } else {
112 m_braceWasOpened = false;
113 }
114 if (!isContinuation && !m_braceWasOpened)
115 line += QLatin1Char(';');
116 return false; // The input was not completely consumed by preprocessing.
117 }
118
119 /**
120 * Implement abstract operation from NativeImportBase.
121 * @param word whitespace delimited item
122 */
fillSource(const QString & word)123 void PythonImport::fillSource(const QString& word)
124 {
125 QString lexeme;
126 const uint len = word.length();
127 for (uint i = 0; i < len; ++i) {
128 const QChar& c = word[i];
129 if (c.isLetterOrNumber() || c == QLatin1Char('_') || c == QLatin1Char('.')) {
130 lexeme += c;
131 } else {
132 if (!lexeme.isEmpty()) {
133 m_source.append(lexeme);
134 m_srcIndex++;
135 lexeme.clear();
136 }
137 m_source.append(QString(c));
138 m_srcIndex++;
139 }
140 }
141 if (!lexeme.isEmpty()) {
142 m_source.append(lexeme);
143 m_srcIndex++;
144 }
145 }
146
147 /**
148 * Return an amount of spaces that corresponds to @param level
149 * @return spaces of indentation
150 */
indentation(int level)151 QString PythonImport::indentation(int level)
152 {
153 QString spaces;
154 for (int i = 0; i < level; ++i) {
155 spaces += QLatin1String(" ");
156 }
157 return spaces;
158 }
159
160 /**
161 * Skip ahead to outermost closing brace.
162 * @return body contents skipped
163 */
skipBody()164 QString PythonImport::skipBody()
165 {
166 /* During input preprocessing, changes in indentation were replaced by
167 braces, and a semicolon was appended to each line ending.
168 In order to return the body, we try to reconstruct the original Python
169 syntax by reverting those changes.
170 */
171 QString body;
172 if (m_source[m_srcIndex] != QLatin1String("{"))
173 skipStmt(QLatin1String("{"));
174 bool firstTokenAfterNewline = true;
175 int braceNesting = 0;
176 QString token;
177 while (!(token = advance()).isNull()) {
178 if (token == QLatin1String("}")) {
179 if (braceNesting <= 0)
180 break;
181 braceNesting--;
182 body += QLatin1Char('\n');
183 firstTokenAfterNewline = true;
184 } else if (token == QLatin1String("{")) {
185 braceNesting++;
186 body += QLatin1String(":\n");
187 firstTokenAfterNewline = true;
188 } else if (token == QLatin1String(";")) {
189 body += QLatin1Char('\n');
190 firstTokenAfterNewline = true;
191 } else {
192 if (firstTokenAfterNewline) {
193 body += indentation(braceNesting);
194 firstTokenAfterNewline = false;
195 } else if (body.contains(QRegExp(QLatin1String("\\w$"))) &&
196 token.contains(QRegExp(QLatin1String("^\\w")))) {
197 body += QLatin1Char(' ');
198 }
199 body += token;
200 }
201 }
202 return body;
203 }
204
205 /**
206 * Parses a python initializer
207 * @param _keyword current string from parser
208 * @param type returns type of assignment
209 * @param value returns assignment value
210 * @return success status of parsing
211 */
parseInitializer(const QString & _keyword,QString & type,QString & value)212 bool PythonImport::parseInitializer(const QString &_keyword, QString &type, QString &value)
213 {
214 QString keyword = _keyword;
215 if (_keyword == QLatin1String("-"))
216 keyword.append(advance());
217
218 if (keyword == QLatin1String("[")) {
219 type = QLatin1String("list");
220 int index = m_srcIndex;
221 skipToClosing(QLatin1Char('['));
222 for (int i = index; i <= m_srcIndex; i++)
223 value += m_source[i];
224 } else if (keyword == QLatin1String("{")) {
225 type = QLatin1String("dict");
226 int index = m_srcIndex;
227 skipToClosing(QLatin1Char('{'));
228 for (int i = index; i <= m_srcIndex; i++)
229 value += m_source[i];
230 } else if (keyword == QLatin1String("(")) {
231 type = QLatin1String("tuple");
232 int index = m_srcIndex;
233 skipToClosing(QLatin1Char('('));
234 for (int i = index; i <= m_srcIndex; i++)
235 value += m_source[i];
236 } else if (keyword.startsWith(QLatin1String("\""))) {
237 type = QLatin1String("string");
238 value = keyword;
239 } else if (keyword == QLatin1String("True") || keyword == QLatin1String("False")) {
240 type = QLatin1String("bool");
241 value = keyword;
242 } else if (keyword.contains(QRegExp(QLatin1String("-?\\d+\\.\\d*")))) {
243 type = QLatin1String("float");
244 value = keyword;
245 } else if (keyword.contains(QRegExp(QLatin1String("-?\\d+")))) {
246 type = QLatin1String("int");
247 value = keyword;
248 } else if (keyword.toLower() == QLatin1String("none")) {
249 type = QLatin1String("object");
250 value = keyword;
251 } else if (!keyword.isEmpty()) {
252 if (lookAhead() == QLatin1String("(")) {
253 advance();
254 type = keyword;
255 int index = m_srcIndex;
256 skipToClosing(QLatin1Char('('));
257 for (int i = index; i <= m_srcIndex; i++)
258 value += m_source[i];
259 } else
260 type = QLatin1String("object");
261 } else
262 type = QLatin1String("object");
263 return true;
264 }
265
266 /**
267 * Parse assignments in the form \<identifier\> '=' \<value\>
268 * Instance variables are identified by a prefixed 'self.'.
269 * @param keyword current string from parser
270 * @return success status of parsing
271 */
parseAssignmentStmt(const QString & keyword)272 bool PythonImport::parseAssignmentStmt(const QString &keyword)
273 {
274 QString variableName = keyword;
275
276 bool isStatic = true;
277 if (variableName.startsWith(QLatin1String("self."))) {
278 variableName.remove(0,5);
279 isStatic = false;
280 }
281 Uml::Visibility::Enum visibility = Uml::Visibility::Public;
282 if (variableName.startsWith(QLatin1String("__"))) {
283 visibility = Uml::Visibility::Private;
284 variableName.remove(0, 2);
285 } else if (variableName.startsWith(QLatin1String("_"))) {
286 visibility = Uml::Visibility::Protected;
287 variableName.remove(0, 1);
288 }
289
290 QString type;
291 QString initialValue;
292 if (advance() == QLatin1String("=")) {
293
294 if (!parseInitializer(advance(), type, initialValue))
295 return false;
296 }
297
298 UMLObject* o = Import_Utils::insertAttribute(m_klass, visibility, variableName,
299 type, m_comment, false);
300 UMLAttribute* a = o->asUMLAttribute();
301 a->setInitialValue(initialValue);
302 a->setStatic(isStatic);
303 return true;
304 }
305
306 /**
307 * Parses method parameter list
308 * @param op UMLOperation instance to add parameter
309 * @return success status of parsing
310 */
parseMethodParameters(UMLOperation * op)311 bool PythonImport::parseMethodParameters(UMLOperation *op)
312 {
313 bool firstParam = true;
314 UMLAttribute *attr = nullptr;
315 while (m_srcIndex < m_source.count() && advance() != QLatin1String(")")) {
316 const QString& parName = m_source[m_srcIndex];
317 if (attr && parName == QLatin1String("=")) {
318 QString type, value;
319 parseInitializer(advance(), type, value);
320 attr->setInitialValue(value);
321 attr->setTypeName(type);
322 } else {
323 if (firstParam) {
324 if (parName.compare(QLatin1String("self"), Qt::CaseInsensitive) != 0) {
325 m_isStatic = true;
326 attr = Import_Utils::addMethodParameter(op, QLatin1String("string"), parName);
327 }
328 firstParam = false;
329 } else {
330 attr = Import_Utils::addMethodParameter(op, QLatin1String("string"), parName);
331 }
332 }
333 if (lookAhead() == QLatin1String(","))
334 advance();
335 }
336 return true;
337 }
338
339 /**
340 * Implement abstract operation from NativeImportBase.
341 * @return success status of operation
342 */
parseStmt()343 bool PythonImport::parseStmt()
344 {
345 const int srcLength = m_source.count();
346 QString keyword = m_source[m_srcIndex];
347 if (keyword == QLatin1String("class")) {
348 const QString& name = advance();
349 UMLObject *ns = Import_Utils::createUMLObject(UMLObject::ot_Class, name,
350 currentScope(), m_comment);
351 pushScope(m_klass = ns->asUMLClassifier());
352 m_comment.clear();
353 if (advance() == QLatin1String("(")) {
354 while (m_srcIndex < srcLength - 1 && advance() != QLatin1String(")")) {
355 const QString& baseName = m_source[m_srcIndex];
356 Import_Utils::createGeneralization(m_klass, baseName);
357 if (advance() != QLatin1String(","))
358 break;
359 }
360 }
361 if (m_source[m_srcIndex] != QLatin1String("{")) {
362 skipStmt(QLatin1String("{"));
363 }
364 log(QLatin1String("class ") + name);
365 return true;
366 }
367 if (keyword == QLatin1String("@")) {
368 const QString& annotation = m_source[++m_srcIndex];
369 uDebug() << "annotation:" << annotation;
370 if (annotation == QLatin1String("staticmethod"))
371 m_isStatic = true;
372 return true;
373 }
374 if (keyword == QLatin1String("def")) {
375 if (m_klass == 0) {
376 // skip functions outside of a class
377 skipBody();
378 return true;
379 }
380
381 if (!m_klass->hasDoc() && !m_comment.isEmpty()) {
382 m_klass->setDoc(m_comment);
383 m_comment = QString();
384 }
385
386 QString name = advance();
387 bool isConstructor = name == QLatin1String("__init__");
388 Uml::Visibility::Enum visibility = Uml::Visibility::Public;
389 if (!isConstructor) {
390 if (name.startsWith(QLatin1String("__"))) {
391 name = name.mid(2);
392 visibility = Uml::Visibility::Private;
393 } else if (name.startsWith(QLatin1String("_"))) {
394 name = name.mid(1);
395 visibility = Uml::Visibility::Protected;
396 }
397 }
398 UMLOperation *op = Import_Utils::makeOperation(m_klass, name);
399 if (advance() != QLatin1String("(")) {
400 uError() << "importPython def " << name << ": expecting \"(\"";
401 skipBody();
402 return true;
403 }
404 if (!parseMethodParameters(op)) {
405 uError() << "importPython error on parsing method parameter for method " << name;
406 skipBody();
407 return true;
408 }
409
410 Import_Utils::insertMethod(m_klass, op, visibility, QLatin1String("string"),
411 m_isStatic, false /*isAbstract*/, false /*isFriend*/,
412 isConstructor, false, m_comment);
413 m_isStatic = false;
414 int srcIndex = m_srcIndex;
415 op->setSourceCode(skipBody());
416
417 if (!op->hasDoc() && !m_comment.isEmpty()) {
418 op->setDoc(m_comment);
419 m_comment = QString();
420 }
421
422 // parse instance variables from __init__ method
423 if (isConstructor) {
424 int indexSave = m_srcIndex;
425 m_srcIndex = srcIndex;
426 advance();
427 keyword = advance();
428 while (m_srcIndex < indexSave) {
429 if (lookAhead() == QLatin1String("=")) {
430 parseAssignmentStmt(keyword);
431 // skip ; inserted by lexer
432 if (lookAhead() == QLatin1String(";")) {
433 advance();
434 keyword = advance();
435 }
436 } else {
437 skipStmt(QLatin1String(";"));
438 keyword = advance();
439 }
440 }
441 m_srcIndex = indexSave;
442 }
443 log(QLatin1String("def ") + name);
444
445 return true;
446 }
447
448 // parse class variables
449 if (m_klass && lookAhead() == QLatin1String("=")) {
450 bool result = parseAssignmentStmt(keyword);
451 log(QLatin1String("class attribute ") + keyword);
452 return result;
453 }
454
455 if (keyword == QLatin1String("}")) {
456 if (scopeIndex()) {
457 m_klass = popScope()->asUMLClassifier();
458 }
459 else
460 uError() << "parsing: too many }";
461 return true;
462 }
463 return false; // @todo parsing of attributes
464 }
465