1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of Qt Creator.
7 **
8 ** Commercial License Usage
9 ** Licensees holding valid commercial Qt licenses may use this file in
10 ** accordance with the commercial license agreement provided with the
11 ** Software or, alternatively, in accordance with the terms contained in
12 ** a written agreement between you and The Qt Company. For licensing terms
13 ** and conditions see https://www.qt.io/terms-conditions. For further
14 ** information use the contact form at https://www.qt.io/contact-us.
15 **
16 ** GNU General Public License Usage
17 ** Alternatively, this file may be used under the terms of the GNU
18 ** General Public License version 3 as published by the Free Software
19 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
20 ** included in the packaging of this file. Please review the following
21 ** information to ensure the GNU General Public License requirements will
22 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
23 **
24 ****************************************************************************/
25
26 /*
27 This file is a self-contained interactive indenter for Qt Script.
28
29 The general problem of indenting a program is ill posed. On
30 the one hand, an indenter has to analyze programs written in a
31 free-form formal language that is best described in terms of
32 tokens, not characters, not lines. On the other hand, indentation
33 applies to lines and white space characters matter, and otherwise
34 the programs to indent are formally invalid in general, as they
35 are begin edited.
36
37 The approach taken here works line by line. We receive a program
38 consisting of N lines or more, and we want to compute the
39 indentation appropriate for the Nth line. Lines beyond the Nth
40 lines are of no concern to us, so for simplicity we pretend the
41 program has exactly N lines and we call the Nth line the "bottom
42 line". Typically, we have to indent the bottom line when it's
43 still empty, so we concentrate our analysis on the N - 1 lines
44 that precede.
45
46 By inspecting the (N - 1)-th line, the (N - 2)-th line, ...
47 backwards, we determine the kind of the bottom line and indent it
48 accordingly.
49
50 * The bottom line is a comment line. See
51 bottomLineStartsInCComment() and
52 indentWhenBottomLineStartsInCComment().
53 * The bottom line is a continuation line. See isContinuationLine()
54 and indentForContinuationLine().
55 * The bottom line is a standalone line. See
56 indentForStandaloneLine().
57
58 Certain tokens that influence the indentation, notably braces,
59 are looked for in the lines. This is done by simple string
60 comparison, without a real tokenizer. Confusing constructs such
61 as comments and string literals are removed beforehand.
62 */
63
64 #include <qmljs/qmljsindenter.h>
65 #include <qmljs/qmljsscanner.h>
66
67 #include <QTextBlock>
68
69 using namespace QmlJS;
70
71 /*
72 Saves and restores the state of the global linizer. This enables
73 backtracking.
74
75 Identical to the defines in qmljslineinfo.cpp
76 */
77 #define YY_SAVE() LinizerState savedState = yyLinizerState
78 #define YY_RESTORE() yyLinizerState = savedState
79
80
QmlJSIndenter()81 QmlJSIndenter::QmlJSIndenter()
82 : caseOrDefault(QRegularExpression(QLatin1String(
83 "^\\s*(?:"
84 "case\\b[^:]+|"
85 "default)"
86 "\\s*:.*$")))
87
88 {
89
90 /*
91 The indenter supports a few parameters:
92
93 * ppHardwareTabSize is the size of a '\t' in your favorite editor.
94 * ppIndentSize is the size of an indentation, or software tab
95 size.
96 * ppContinuationIndentSize is the extra indent for a continuation
97 line, when there is nothing to align against on the previous
98 line.
99 * ppCommentOffset is the indentation within a C-style comment,
100 when it cannot be picked up.
101 */
102
103 ppHardwareTabSize = 8;
104 ppIndentSize = 4;
105 ppContinuationIndentSize = 8;
106 ppCommentOffset = 2;
107 }
108
~QmlJSIndenter()109 QmlJSIndenter::~QmlJSIndenter()
110 {
111 }
112
setTabSize(int size)113 void QmlJSIndenter::setTabSize(int size)
114 {
115 ppHardwareTabSize = size;
116 }
117
setIndentSize(int size)118 void QmlJSIndenter::setIndentSize(int size)
119 {
120 ppIndentSize = size;
121 ppContinuationIndentSize = 2 * size;
122 }
123
124 /*
125 Returns true if string t is made only of white space; otherwise
126 returns false.
127 */
isOnlyWhiteSpace(const QString & t) const128 bool QmlJSIndenter::isOnlyWhiteSpace(const QString &t) const
129 {
130 return firstNonWhiteSpace(t).isNull();
131 }
132
133 /*
134 Assuming string t is a line, returns the column number of a given
135 index. Column numbers and index are identical for strings that don't
136 contain '\t's.
137 */
columnForIndex(const QString & t,int index) const138 int QmlJSIndenter::columnForIndex(const QString &t, int index) const
139 {
140 int col = 0;
141 if (index > t.length())
142 index = t.length();
143
144 for (int i = 0; i < index; i++) {
145 if (t.at(i) == QLatin1Char('\t'))
146 col = ((col / ppHardwareTabSize) + 1) * ppHardwareTabSize;
147 else
148 col++;
149 }
150 return col;
151 }
152
153 /*
154 Returns the indentation size of string t.
155 */
indentOfLine(const QString & t) const156 int QmlJSIndenter::indentOfLine(const QString &t) const
157 {
158 return columnForIndex(t, t.indexOf(firstNonWhiteSpace(t)));
159 }
160
161 /*
162 Replaces t[k] by ch, unless t[k] is '\t'. Tab characters are better
163 left alone since they break the "index equals column" rule. No
164 provisions are taken against '\n' or '\r', which shouldn't occur in
165 t anyway.
166 */
eraseChar(QString & t,int k,QChar ch) const167 void QmlJSIndenter::eraseChar(QString &t, int k, QChar ch) const
168 {
169 if (t.at(k) != QLatin1Char('\t'))
170 t[k] = ch;
171 }
172
173 /*
174 Returns '(' if the last parenthesis is opening, ')' if it is
175 closing, and QChar() if there are no parentheses in t.
176 */
lastParen() const177 QChar QmlJSIndenter::lastParen() const
178 {
179 for (int index = yyLinizerState.tokens.size() - 1; index != -1; --index) {
180 const Token &token = yyLinizerState.tokens.at(index);
181
182 if (token.is(Token::LeftParenthesis))
183 return QLatin1Char('(');
184
185 else if (token.is(Token::RightParenthesis))
186 return QLatin1Char(')');
187 }
188
189 return QChar();
190 }
191
192 /*
193 Returns true if typedIn the same as okayCh or is null; otherwise
194 returns false.
195 */
okay(QChar typedIn,QChar okayCh) const196 bool QmlJSIndenter::okay(QChar typedIn, QChar okayCh) const
197 {
198 return typedIn == QChar() || typedIn == okayCh;
199 }
200
201 /*
202 Returns the recommended indent for the bottom line of yyProgram
203 assuming that it starts in a C-style comment, a condition that is
204 tested elsewhere.
205
206 Essentially, we're trying to align against some text on the
207 previous line.
208 */
indentWhenBottomLineStartsInMultiLineComment()209 int QmlJSIndenter::indentWhenBottomLineStartsInMultiLineComment()
210 {
211 QTextBlock block = yyProgram.lastBlock().previous();
212 QString blockText;
213
214 for (; block.isValid(); block = block.previous()) {
215 blockText = block.text();
216
217 if (! isOnlyWhiteSpace(blockText))
218 break;
219 }
220
221 return indentOfLine(blockText);
222 }
223
224 /*
225 Returns the recommended indent for the bottom line of yyProgram,
226 assuming it's a continuation line.
227
228 We're trying to align the continuation line against some parenthesis
229 or other bracked left opened on a previous line, or some interesting
230 operator such as '='.
231 */
indentForContinuationLine()232 int QmlJSIndenter::indentForContinuationLine()
233 {
234 int braceDepth = 0;
235 int delimDepth = 0;
236
237 bool leftBraceFollowed = *yyLeftBraceFollows;
238
239 for (int i = 0; i < SmallRoof; i++) {
240 int hook = -1;
241
242 int j = yyLine->length();
243 while (j > 0 && hook < 0) {
244 j--;
245 QChar ch = yyLine->at(j);
246
247 switch (ch.unicode()) {
248 case ')':
249 delimDepth++;
250 break;
251 case ']':
252 braceDepth++;
253 break;
254 case '}':
255 braceDepth++;
256 break;
257 case '(':
258 delimDepth--;
259 /*
260 An unclosed delimiter is a good place to align at,
261 at least for some styles (including Qt's).
262 */
263 if (delimDepth == -1)
264 hook = j;
265 break;
266
267 case '[':
268 braceDepth--;
269 /*
270 An unclosed delimiter is a good place to align at,
271 at least for some styles (including Qt's).
272 */
273 if (braceDepth == -1)
274 hook = j;
275 break;
276 case '{':
277 braceDepth--;
278 /*
279 A left brace followed by other stuff on the same
280 line is typically for an enum or an initializer.
281 Such a brace must be treated just like the other
282 delimiters.
283 */
284 if (braceDepth == -1) {
285 if (j < yyLine->length() - 1)
286 hook = j;
287 else
288 return 0; // shouldn't happen
289 }
290 break;
291 case '=':
292 /*
293 An equal sign is a very natural alignment hook
294 because it's usually the operator with the lowest
295 precedence in statements it appears in. Case in
296 point:
297
298 int x = 1 +
299 2;
300
301 However, we have to beware of constructs such as
302 default arguments and explicit enum constant
303 values:
304
305 void foo(int x = 0,
306 int y = 0);
307
308 And not
309
310 void foo(int x = 0,
311 int y = 0);
312
313 These constructs are caracterized by a ',' at the
314 end of the unfinished lines or by unbalanced
315 parentheses.
316 */
317 Q_ASSERT(j - 1 >= 0);
318
319 if (QString::fromLatin1("!=<>").indexOf(yyLine->at(j - 1)) == -1 &&
320 j + 1 < yyLine->length() && yyLine->at(j + 1) != QLatin1Char('=')) {
321 if (braceDepth == 0 && delimDepth == 0 &&
322 j < yyLine->length() - 1 &&
323 !yyLine->endsWith(QLatin1Char(',')) &&
324 (yyLine->contains(QLatin1Char('(')) == yyLine->contains(QLatin1Char(')'))))
325 hook = j;
326 }
327 }
328 }
329
330 if (hook >= 0) {
331 /*
332 Yes, we have a delimiter or an operator to align
333 against! We don't really align against it, but rather
334 against the following token, if any. In this example,
335 the following token is "11":
336
337 int x = (11 +
338 2);
339
340 If there is no such token, we use a continuation indent:
341
342 static QRegExp foo(QString(
343 "foo foo foo foo foo foo foo foo foo"));
344 */
345 hook++;
346 while (hook < yyLine->length()) {
347 if (!yyLine->at(hook).isSpace())
348 return columnForIndex(*yyLine, hook);
349 hook++;
350 }
351 return indentOfLine(*yyLine) + ppContinuationIndentSize;
352 }
353
354 if (braceDepth != 0)
355 break;
356
357 /*
358 The line's delimiters are balanced. It looks like a
359 continuation line or something.
360 */
361 if (delimDepth == 0) {
362 if (leftBraceFollowed) {
363 /*
364 We have
365
366 int main()
367 {
368
369 or
370
371 Bar::Bar()
372 : Foo(x)
373 {
374
375 The "{" should be flush left.
376 */
377 if (!isContinuationLine())
378 return indentOfLine(*yyLine);
379 } else if (isContinuationLine() || yyLine->endsWith(QLatin1Char(','))) {
380 /*
381 We have
382
383 x = a +
384 b +
385 c;
386
387 or
388
389 int t[] = {
390 1, 2, 3,
391 4, 5, 6
392
393 The "c;" should fall right under the "b +", and the
394 "4, 5, 6" right under the "1, 2, 3,".
395 */
396 return indentOfLine(*yyLine);
397 } else {
398 /*
399 We have
400
401 stream << 1 +
402 2;
403
404 We could, but we don't, try to analyze which
405 operator has precedence over which and so on, to
406 obtain the excellent result
407
408 stream << 1 +
409 2;
410
411 We do have a special trick above for the assignment
412 operator above, though.
413 */
414 return indentOfLine(*yyLine) + ppContinuationIndentSize;
415 }
416 }
417
418 if (!readLine())
419 break;
420 }
421 return 0;
422 }
423
424 /*
425 Returns the recommended indent for the bottom line of yyProgram if
426 that line is standalone (or should be indented likewise).
427
428 Indenting a standalone line is tricky, mostly because of braceless
429 control statements. Grossly, we are looking backwards for a special
430 line, a "hook line", that we can use as a starting point to indent,
431 and then modify the indentation level according to the braces met
432 along the way to that hook.
433
434 Let's consider a few examples. In all cases, we want to indent the
435 bottom line.
436
437 Example 1:
438
439 x = 1;
440 y = 2;
441
442 The hook line is "x = 1;". We met 0 opening braces and 0 closing
443 braces. Therefore, "y = 2;" inherits the indent of "x = 1;".
444
445 Example 2:
446
447 if (x) {
448 y;
449
450 The hook line is "if (x) {". No matter what precedes it, "y;" has
451 to be indented one level deeper than the hook line, since we met one
452 opening brace along the way.
453
454 Example 3:
455
456 if (a)
457 while (b) {
458 c;
459 }
460 d;
461
462 To indent "d;" correctly, we have to go as far as the "if (a)".
463 Compare with
464
465 if (a) {
466 while (b) {
467 c;
468 }
469 d;
470
471 Still, we're striving to go back as little as possible to
472 accommodate people with irregular indentation schemes. A hook line
473 near at hand is much more reliable than a remote one.
474 */
indentForStandaloneLine()475 int QmlJSIndenter::indentForStandaloneLine()
476 {
477 for (int i = 0; i < SmallRoof; i++) {
478 if (!*yyLeftBraceFollows) {
479 YY_SAVE();
480
481 if (matchBracelessControlStatement()) {
482 /*
483 The situation is this, and we want to indent "z;":
484
485 if (x &&
486 y)
487 z;
488
489 yyLine is "if (x &&".
490 */
491 return indentOfLine(*yyLine) + ppIndentSize;
492 }
493 YY_RESTORE();
494 }
495
496 if (yyLine->endsWith(QLatin1Char(';')) || yyLine->contains(QLatin1Char('{'))) {
497 /*
498 The situation is possibly this, and we want to indent
499 "z;":
500
501 while (x)
502 y;
503 z;
504
505 We return the indent of "while (x)". In place of "y;",
506 any arbitrarily complex compound statement can appear.
507 */
508
509 if (*yyBraceDepth > 0) {
510 do {
511 if (!readLine())
512 break;
513 } while (*yyBraceDepth > 0);
514 }
515
516 LinizerState hookState;
517
518 while (isContinuationLine())
519 readLine();
520 hookState = yyLinizerState;
521
522 readLine();
523 if (*yyBraceDepth <= 0) {
524 do {
525 if (!matchBracelessControlStatement())
526 break;
527 hookState = yyLinizerState;
528 } while (readLine());
529 }
530
531 yyLinizerState = hookState;
532
533 while (isContinuationLine())
534 readLine();
535
536 int indentChange = - *yyBraceDepth;
537 if (caseOrDefault.match(*yyLine).hasMatch())
538 ++indentChange;
539
540 /*
541 Never trust lines containing only '{' or '}', as some
542 people (Richard M. Stallman) format them weirdly.
543 */
544 if (yyLine->trimmed().length() > 1)
545 return indentOfLine(*yyLine) + indentChange * ppIndentSize;
546 }
547
548 if (!readLine())
549 return -*yyBraceDepth * ppIndentSize;
550 }
551 return 0;
552 }
553
554 /*
555 Returns the recommended indent for the bottom line of program.
556 Unless null, typedIn stores the character of yyProgram that
557 triggered reindentation.
558
559 This function works better if typedIn is set properly; it is
560 slightly more conservative if typedIn is completely wild, and
561 slighly more liberal if typedIn is always null. The user might be
562 annoyed by the liberal behavior.
563 */
indentForBottomLine(QTextBlock begin,QTextBlock end,QChar typedIn)564 int QmlJSIndenter::indentForBottomLine(QTextBlock begin, QTextBlock end, QChar typedIn)
565 {
566 if (begin == end)
567 return 0;
568
569 const QTextBlock last = end.previous();
570
571 initialize(begin, last);
572
573 QString bottomLine = last.text();
574 QChar firstCh = firstNonWhiteSpace(bottomLine);
575 int indent = 0;
576
577 if (bottomLineStartsInMultilineComment()) {
578 /*
579 The bottom line starts in a C-style comment. Indent it
580 smartly, unless the user has already played around with it,
581 in which case it's better to leave her stuff alone.
582 */
583 if (isOnlyWhiteSpace(bottomLine))
584 indent = indentWhenBottomLineStartsInMultiLineComment();
585 else
586 indent = indentOfLine(bottomLine);
587 } else {
588 if (isUnfinishedLine())
589 indent = indentForContinuationLine();
590 else
591 indent = indentForStandaloneLine();
592
593 if ((okay(typedIn, QLatin1Char('}')) && firstCh == QLatin1Char('}'))
594 || (okay(typedIn, QLatin1Char(']')) && firstCh == QLatin1Char(']'))) {
595 /*
596 A closing brace is one level more to the left than the
597 code it follows.
598 */
599 indent -= ppIndentSize;
600 } else if (okay(typedIn, QLatin1Char(':'))) {
601 if (caseOrDefault.match(bottomLine).hasMatch()) {
602 /*
603 Move a case label (or the ':' in front of a
604 constructor initialization list) one level to the
605 left, but only if the user did not play around with
606 it yet. Some users have exotic tastes in the
607 matter, and most users probably are not patient
608 enough to wait for the final ':' to format their
609 code properly.
610
611 We don't attempt the same for goto labels, as the
612 user is probably the middle of "foo::bar". (Who
613 uses goto, anyway?)
614 */
615 if (indentOfLine(bottomLine) <= indent)
616 indent -= ppIndentSize;
617 else
618 indent = indentOfLine(bottomLine);
619 }
620 }
621 }
622
623 return qMax(0, indent);
624 }
625
626