1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of Qt Creator.
7 **
8 ** Commercial License Usage
9 ** Licensees holding valid commercial Qt licenses may use this file in
10 ** accordance with the commercial license agreement provided with the
11 ** Software or, alternatively, in accordance with the terms contained in
12 ** a written agreement between you and The Qt Company. For licensing terms
13 ** and conditions see https://www.qt.io/terms-conditions. For further
14 ** information use the contact form at https://www.qt.io/contact-us.
15 **
16 ** GNU General Public License Usage
17 ** Alternatively, this file may be used under the terms of the GNU
18 ** General Public License version 3 as published by the Free Software
19 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
20 ** included in the packaging of this file. Please review the following
21 ** information to ensure the GNU General Public License requirements will
22 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
23 **
24 ****************************************************************************/
25 
26 /*
27     This file is a self-contained interactive indenter for Qt Script.
28 
29     The general problem of indenting a program is ill posed. On
30     the one hand, an indenter has to analyze programs written in a
31     free-form formal language that is best described in terms of
32     tokens, not characters, not lines. On the other hand, indentation
33     applies to lines and white space characters matter, and otherwise
34     the programs to indent are formally invalid in general, as they
35     are begin edited.
36 
37     The approach taken here works line by line. We receive a program
38     consisting of N lines or more, and we want to compute the
39     indentation appropriate for the Nth line. Lines beyond the Nth
40     lines are of no concern to us, so for simplicity we pretend the
41     program has exactly N lines and we call the Nth line the "bottom
42     line". Typically, we have to indent the bottom line when it's
43     still empty, so we concentrate our analysis on the N - 1 lines
44     that precede.
45 
46     By inspecting the (N - 1)-th line, the (N - 2)-th line, ...
47     backwards, we determine the kind of the bottom line and indent it
48     accordingly.
49 
50       * The bottom line is a comment line. See
51         bottomLineStartsInCComment() and
52         indentWhenBottomLineStartsInCComment().
53       * The bottom line is a continuation line. See isContinuationLine()
54         and indentForContinuationLine().
55       * The bottom line is a standalone line. See
56         indentForStandaloneLine().
57 
58     Certain tokens that influence the indentation, notably braces,
59     are looked for in the lines. This is done by simple string
60     comparison, without a real tokenizer. Confusing constructs such
61     as comments and string literals are removed beforehand.
62 */
63 
64 #include <qmljs/qmljsindenter.h>
65 #include <qmljs/qmljsscanner.h>
66 
67 #include <QTextBlock>
68 
69 using namespace QmlJS;
70 
71 /*
72     Saves and restores the state of the global linizer. This enables
73     backtracking.
74 
75     Identical to the defines in qmljslineinfo.cpp
76 */
77 #define YY_SAVE() LinizerState savedState = yyLinizerState
78 #define YY_RESTORE() yyLinizerState = savedState
79 
80 
QmlJSIndenter()81 QmlJSIndenter::QmlJSIndenter()
82     : caseOrDefault(QRegularExpression(QLatin1String(
83             "^\\s*(?:"
84             "case\\b[^:]+|"
85             "default)"
86             "\\s*:.*$")))
87 
88 {
89 
90     /*
91         The indenter supports a few parameters:
92 
93           * ppHardwareTabSize is the size of a '\t' in your favorite editor.
94           * ppIndentSize is the size of an indentation, or software tab
95             size.
96           * ppContinuationIndentSize is the extra indent for a continuation
97             line, when there is nothing to align against on the previous
98             line.
99           * ppCommentOffset is the indentation within a C-style comment,
100             when it cannot be picked up.
101     */
102 
103     ppHardwareTabSize = 8;
104     ppIndentSize = 4;
105     ppContinuationIndentSize = 8;
106     ppCommentOffset = 2;
107 }
108 
~QmlJSIndenter()109 QmlJSIndenter::~QmlJSIndenter()
110 {
111 }
112 
setTabSize(int size)113 void QmlJSIndenter::setTabSize(int size)
114 {
115     ppHardwareTabSize = size;
116 }
117 
setIndentSize(int size)118 void QmlJSIndenter::setIndentSize(int size)
119 {
120     ppIndentSize = size;
121     ppContinuationIndentSize = 2 * size;
122 }
123 
124 /*
125     Returns true if string t is made only of white space; otherwise
126     returns false.
127 */
isOnlyWhiteSpace(const QString & t) const128 bool QmlJSIndenter::isOnlyWhiteSpace(const QString &t) const
129 {
130     return firstNonWhiteSpace(t).isNull();
131 }
132 
133 /*
134     Assuming string t is a line, returns the column number of a given
135     index. Column numbers and index are identical for strings that don't
136     contain '\t's.
137 */
columnForIndex(const QString & t,int index) const138 int QmlJSIndenter::columnForIndex(const QString &t, int index) const
139 {
140     int col = 0;
141     if (index > t.length())
142         index = t.length();
143 
144     for (int i = 0; i < index; i++) {
145         if (t.at(i) == QLatin1Char('\t'))
146             col = ((col / ppHardwareTabSize) + 1) * ppHardwareTabSize;
147         else
148             col++;
149     }
150     return col;
151 }
152 
153 /*
154     Returns the indentation size of string t.
155 */
indentOfLine(const QString & t) const156 int QmlJSIndenter::indentOfLine(const QString &t) const
157 {
158     return columnForIndex(t, t.indexOf(firstNonWhiteSpace(t)));
159 }
160 
161 /*
162     Replaces t[k] by ch, unless t[k] is '\t'. Tab characters are better
163     left alone since they break the "index equals column" rule. No
164     provisions are taken against '\n' or '\r', which shouldn't occur in
165     t anyway.
166 */
eraseChar(QString & t,int k,QChar ch) const167 void QmlJSIndenter::eraseChar(QString &t, int k, QChar ch) const
168 {
169     if (t.at(k) != QLatin1Char('\t'))
170         t[k] = ch;
171 }
172 
173 /*
174     Returns '(' if the last parenthesis is opening, ')' if it is
175     closing, and QChar() if there are no parentheses in t.
176 */
lastParen() const177 QChar QmlJSIndenter::lastParen() const
178 {
179     for (int index = yyLinizerState.tokens.size() - 1; index != -1; --index) {
180         const Token &token = yyLinizerState.tokens.at(index);
181 
182         if (token.is(Token::LeftParenthesis))
183             return QLatin1Char('(');
184 
185         else if (token.is(Token::RightParenthesis))
186             return QLatin1Char(')');
187     }
188 
189     return QChar();
190 }
191 
192 /*
193     Returns true if typedIn the same as okayCh or is null; otherwise
194     returns false.
195 */
okay(QChar typedIn,QChar okayCh) const196 bool QmlJSIndenter::okay(QChar typedIn, QChar okayCh) const
197 {
198     return typedIn == QChar() || typedIn == okayCh;
199 }
200 
201 /*
202     Returns the recommended indent for the bottom line of yyProgram
203     assuming that it starts in a C-style comment, a condition that is
204     tested elsewhere.
205 
206     Essentially, we're trying to align against some text on the
207     previous line.
208 */
indentWhenBottomLineStartsInMultiLineComment()209 int QmlJSIndenter::indentWhenBottomLineStartsInMultiLineComment()
210 {
211     QTextBlock block = yyProgram.lastBlock().previous();
212     QString blockText;
213 
214     for (; block.isValid(); block = block.previous()) {
215         blockText = block.text();
216 
217         if (! isOnlyWhiteSpace(blockText))
218             break;
219     }
220 
221     return indentOfLine(blockText);
222 }
223 
224 /*
225     Returns the recommended indent for the bottom line of yyProgram,
226     assuming it's a continuation line.
227 
228     We're trying to align the continuation line against some parenthesis
229     or other bracked left opened on a previous line, or some interesting
230     operator such as '='.
231 */
indentForContinuationLine()232 int QmlJSIndenter::indentForContinuationLine()
233 {
234     int braceDepth = 0;
235     int delimDepth = 0;
236 
237     bool leftBraceFollowed = *yyLeftBraceFollows;
238 
239     for (int i = 0; i < SmallRoof; i++) {
240         int hook = -1;
241 
242         int j = yyLine->length();
243         while (j > 0 && hook < 0) {
244             j--;
245             QChar ch = yyLine->at(j);
246 
247             switch (ch.unicode()) {
248             case ')':
249                 delimDepth++;
250                 break;
251             case ']':
252                 braceDepth++;
253                 break;
254             case '}':
255                 braceDepth++;
256                 break;
257             case '(':
258                 delimDepth--;
259                 /*
260                     An unclosed delimiter is a good place to align at,
261                     at least for some styles (including Qt's).
262                 */
263                 if (delimDepth == -1)
264                     hook = j;
265                 break;
266 
267             case '[':
268                 braceDepth--;
269                 /*
270                     An unclosed delimiter is a good place to align at,
271                     at least for some styles (including Qt's).
272                 */
273                 if (braceDepth == -1)
274                     hook = j;
275                 break;
276             case '{':
277                 braceDepth--;
278                 /*
279                     A left brace followed by other stuff on the same
280                     line is typically for an enum or an initializer.
281                     Such a brace must be treated just like the other
282                     delimiters.
283                 */
284                 if (braceDepth == -1) {
285                     if (j < yyLine->length() - 1)
286                         hook = j;
287                     else
288                         return 0; // shouldn't happen
289                 }
290                 break;
291             case '=':
292                 /*
293                     An equal sign is a very natural alignment hook
294                     because it's usually the operator with the lowest
295                     precedence in statements it appears in. Case in
296                     point:
297 
298                         int x = 1 +
299                                 2;
300 
301                     However, we have to beware of constructs such as
302                     default arguments and explicit enum constant
303                     values:
304 
305                         void foo(int x = 0,
306                                   int y = 0);
307 
308                     And not
309 
310                         void foo(int x = 0,
311                                         int y = 0);
312 
313                     These constructs are caracterized by a ',' at the
314                     end of the unfinished lines or by unbalanced
315                     parentheses.
316                 */
317                 Q_ASSERT(j - 1 >= 0);
318 
319                 if (QString::fromLatin1("!=<>").indexOf(yyLine->at(j - 1)) == -1 &&
320                      j + 1 < yyLine->length() && yyLine->at(j + 1) != QLatin1Char('=')) {
321                     if (braceDepth == 0 && delimDepth == 0 &&
322                          j < yyLine->length() - 1 &&
323                          !yyLine->endsWith(QLatin1Char(',')) &&
324                          (yyLine->contains(QLatin1Char('(')) == yyLine->contains(QLatin1Char(')'))))
325                         hook = j;
326                 }
327             }
328         }
329 
330         if (hook >= 0) {
331             /*
332                 Yes, we have a delimiter or an operator to align
333                 against! We don't really align against it, but rather
334                 against the following token, if any. In this example,
335                 the following token is "11":
336 
337                     int x = (11 +
338                               2);
339 
340                 If there is no such token, we use a continuation indent:
341 
342                     static QRegExp foo(QString(
343                             "foo foo foo foo foo foo foo foo foo"));
344             */
345             hook++;
346             while (hook < yyLine->length()) {
347                 if (!yyLine->at(hook).isSpace())
348                     return columnForIndex(*yyLine, hook);
349                 hook++;
350             }
351             return indentOfLine(*yyLine) + ppContinuationIndentSize;
352         }
353 
354         if (braceDepth != 0)
355             break;
356 
357         /*
358             The line's delimiters are balanced. It looks like a
359             continuation line or something.
360         */
361         if (delimDepth == 0) {
362             if (leftBraceFollowed) {
363                 /*
364                     We have
365 
366                         int main()
367                         {
368 
369                     or
370 
371                         Bar::Bar()
372                             : Foo(x)
373                         {
374 
375                     The "{" should be flush left.
376                 */
377                 if (!isContinuationLine())
378                     return indentOfLine(*yyLine);
379             } else if (isContinuationLine() || yyLine->endsWith(QLatin1Char(','))) {
380                 /*
381                     We have
382 
383                         x = a +
384                             b +
385                             c;
386 
387                     or
388 
389                         int t[] = {
390                             1, 2, 3,
391                             4, 5, 6
392 
393                     The "c;" should fall right under the "b +", and the
394                     "4, 5, 6" right under the "1, 2, 3,".
395                 */
396                 return indentOfLine(*yyLine);
397             } else {
398                 /*
399                     We have
400 
401                         stream << 1 +
402                                 2;
403 
404                     We could, but we don't, try to analyze which
405                     operator has precedence over which and so on, to
406                     obtain the excellent result
407 
408                         stream << 1 +
409                                   2;
410 
411                     We do have a special trick above for the assignment
412                     operator above, though.
413                 */
414                 return indentOfLine(*yyLine) + ppContinuationIndentSize;
415             }
416         }
417 
418         if (!readLine())
419             break;
420     }
421     return 0;
422 }
423 
424 /*
425     Returns the recommended indent for the bottom line of yyProgram if
426     that line is standalone (or should be indented likewise).
427 
428     Indenting a standalone line is tricky, mostly because of braceless
429     control statements. Grossly, we are looking backwards for a special
430     line, a "hook line", that we can use as a starting point to indent,
431     and then modify the indentation level according to the braces met
432     along the way to that hook.
433 
434     Let's consider a few examples. In all cases, we want to indent the
435     bottom line.
436 
437     Example 1:
438 
439         x = 1;
440         y = 2;
441 
442     The hook line is "x = 1;". We met 0 opening braces and 0 closing
443     braces. Therefore, "y = 2;" inherits the indent of "x = 1;".
444 
445     Example 2:
446 
447         if (x) {
448             y;
449 
450     The hook line is "if (x) {". No matter what precedes it, "y;" has
451     to be indented one level deeper than the hook line, since we met one
452     opening brace along the way.
453 
454     Example 3:
455 
456         if (a)
457             while (b) {
458                 c;
459             }
460         d;
461 
462     To indent "d;" correctly, we have to go as far as the "if (a)".
463     Compare with
464 
465         if (a) {
466             while (b) {
467                 c;
468             }
469             d;
470 
471     Still, we're striving to go back as little as possible to
472     accommodate people with irregular indentation schemes. A hook line
473     near at hand is much more reliable than a remote one.
474 */
indentForStandaloneLine()475 int QmlJSIndenter::indentForStandaloneLine()
476 {
477     for (int i = 0; i < SmallRoof; i++) {
478         if (!*yyLeftBraceFollows) {
479             YY_SAVE();
480 
481             if (matchBracelessControlStatement()) {
482                 /*
483                     The situation is this, and we want to indent "z;":
484 
485                         if (x &&
486                              y)
487                             z;
488 
489                     yyLine is "if (x &&".
490                 */
491                 return indentOfLine(*yyLine) + ppIndentSize;
492             }
493             YY_RESTORE();
494         }
495 
496         if (yyLine->endsWith(QLatin1Char(';')) || yyLine->contains(QLatin1Char('{'))) {
497             /*
498                 The situation is possibly this, and we want to indent
499                 "z;":
500 
501                     while (x)
502                         y;
503                     z;
504 
505                 We return the indent of "while (x)". In place of "y;",
506                 any arbitrarily complex compound statement can appear.
507             */
508 
509             if (*yyBraceDepth > 0) {
510                 do {
511                     if (!readLine())
512                         break;
513                 } while (*yyBraceDepth > 0);
514             }
515 
516             LinizerState hookState;
517 
518             while (isContinuationLine())
519                 readLine();
520             hookState = yyLinizerState;
521 
522             readLine();
523             if (*yyBraceDepth <= 0) {
524                 do {
525                     if (!matchBracelessControlStatement())
526                         break;
527                     hookState = yyLinizerState;
528                 } while (readLine());
529             }
530 
531             yyLinizerState = hookState;
532 
533             while (isContinuationLine())
534                 readLine();
535 
536             int indentChange = - *yyBraceDepth;
537             if (caseOrDefault.match(*yyLine).hasMatch())
538                 ++indentChange;
539 
540             /*
541               Never trust lines containing only '{' or '}', as some
542               people (Richard M. Stallman) format them weirdly.
543             */
544             if (yyLine->trimmed().length() > 1)
545                 return indentOfLine(*yyLine) + indentChange * ppIndentSize;
546         }
547 
548         if (!readLine())
549             return -*yyBraceDepth * ppIndentSize;
550     }
551     return 0;
552 }
553 
554 /*
555     Returns the recommended indent for the bottom line of program.
556     Unless null, typedIn stores the character of yyProgram that
557     triggered reindentation.
558 
559     This function works better if typedIn is set properly; it is
560     slightly more conservative if typedIn is completely wild, and
561     slighly more liberal if typedIn is always null. The user might be
562     annoyed by the liberal behavior.
563 */
indentForBottomLine(QTextBlock begin,QTextBlock end,QChar typedIn)564 int QmlJSIndenter::indentForBottomLine(QTextBlock begin, QTextBlock end, QChar typedIn)
565 {
566     if (begin == end)
567         return 0;
568 
569     const QTextBlock last = end.previous();
570 
571     initialize(begin, last);
572 
573     QString bottomLine = last.text();
574     QChar firstCh = firstNonWhiteSpace(bottomLine);
575     int indent = 0;
576 
577     if (bottomLineStartsInMultilineComment()) {
578         /*
579             The bottom line starts in a C-style comment. Indent it
580             smartly, unless the user has already played around with it,
581             in which case it's better to leave her stuff alone.
582         */
583         if (isOnlyWhiteSpace(bottomLine))
584             indent = indentWhenBottomLineStartsInMultiLineComment();
585         else
586             indent = indentOfLine(bottomLine);
587     } else {
588         if (isUnfinishedLine())
589             indent = indentForContinuationLine();
590         else
591             indent = indentForStandaloneLine();
592 
593         if ((okay(typedIn, QLatin1Char('}')) && firstCh == QLatin1Char('}'))
594             || (okay(typedIn, QLatin1Char(']')) && firstCh == QLatin1Char(']'))) {
595             /*
596                 A closing brace is one level more to the left than the
597                 code it follows.
598             */
599             indent -= ppIndentSize;
600         } else if (okay(typedIn, QLatin1Char(':'))) {
601             if (caseOrDefault.match(bottomLine).hasMatch()) {
602                 /*
603                     Move a case label (or the ':' in front of a
604                     constructor initialization list) one level to the
605                     left, but only if the user did not play around with
606                     it yet. Some users have exotic tastes in the
607                     matter, and most users probably are not patient
608                     enough to wait for the final ':' to format their
609                     code properly.
610 
611                     We don't attempt the same for goto labels, as the
612                     user is probably the middle of "foo::bar". (Who
613                     uses goto, anyway?)
614                 */
615                 if (indentOfLine(bottomLine) <= indent)
616                     indent -= ppIndentSize;
617                 else
618                     indent = indentOfLine(bottomLine);
619             }
620         }
621     }
622 
623     return qMax(0, indent);
624 }
625 
626