1 /****************************************************************************
2 **
3 ** Copyright (C) 2019 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the tools applications of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
21 ** included in the packaging of this file. Please review the following
22 ** information to ensure the GNU General Public License requirements will
23 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
24 **
25 ** $QT_END_LICENSE$
26 **
27 ****************************************************************************/
28 
29 /*
30     This file is a self-contained interactive indenter for C++ and Qt
31     Script.
32 
33     The general problem of indenting a C++ program is ill posed. On
34     the one hand, an indenter has to analyze programs written in a
35     free-form formal language that is best described in terms of
36     tokens, not characters, not lines. On the other hand, indentation
37     applies to lines and white space characters matter, and otherwise
38     the programs to indent are formally invalid in general, as they
39     are begin edited.
40 
41     The approach taken here works line by line. We receive a program
42     consisting of N lines or more, and we want to compute the
43     indentation appropriate for the Nth line. Lines beyond the Nth
44     lines are of no concern to us, so for simplicity we pretend the
45     program has exactly N lines and we call the Nth line the "bottom
46     line". Typically, we have to indent the bottom line when it's
47     still empty, so we concentrate our analysis on the N - 1 lines
48     that precede.
49 
50     By inspecting the (N - 1)-th line, the (N - 2)-th line, ...
51     backwards, we determine the kind of the bottom line and indent it
52     accordingly.
53 
54       * The bottom line is a comment line. See
55         bottomLineStartsInCComment() and
56         indentWhenBottomLineStartsInCComment().
57       * The bottom line is a continuation line. See isContinuationLine()
58         and indentForContinuationLine().
59       * The bottom line is a standalone line. See
60         indentForStandaloneLine().
61 
62     Certain tokens that influence the indentation, notably braces,
63     are looked for in the lines. This is done by simple string
64     comparison, without a real tokenizer. Confusing constructs such
65     as comments and string literals are removed beforehand.
66 */
67 
68 #include <QtCore/qregexp.h>
69 #include <QtCore/qstringlist.h>
70 
71 QT_BEGIN_NAMESPACE
72 
73 /* qmake ignore Q_OBJECT */
74 
75 /*
76     The indenter avoids getting stuck in almost infinite loops by
77     imposing arbitrary limits on the number of lines it analyzes when
78     looking for a construct.
79 
80     For example, the indenter never considers more than BigRoof lines
81     backwards when looking for the start of a C-style comment.
82 */
83 static const int SmallRoof = 40;
84 static const int BigRoof = 400;
85 
86 /*
87     The indenter supports a few parameters:
88 
89       * ppHardwareTabSize is the size of a '\t' in your favorite editor.
90       * ppIndentSize is the size of an indentation, or software tab
91         size.
92       * ppContinuationIndentSize is the extra indent for a continuation
93         line, when there is nothing to align against on the previous
94         line.
95       * ppCommentOffset is the indentation within a C-style comment,
96         when it cannot be picked up.
97 */
98 
99 static int ppHardwareTabSize = 8;
100 static int ppIndentSize = 4;
101 static int ppContinuationIndentSize = 8;
102 
103 static const int ppCommentOffset = 2;
104 
setTabSize(int size)105 void setTabSize(int size)
106 {
107     ppHardwareTabSize = size;
108 }
109 
setIndentSize(int size)110 void setIndentSize(int size)
111 {
112     ppIndentSize = size;
113     ppContinuationIndentSize = 2 * size;
114 }
115 
116 static QRegExp *literal = nullptr;
117 static QRegExp *label = nullptr;
118 static QRegExp *inlineCComment = nullptr;
119 static QRegExp *braceX = nullptr;
120 static QRegExp *iflikeKeyword = nullptr;
121 
122 /*
123     Returns the first non-space character in the string t, or
124     QChar::Null if the string is made only of white space.
125 */
firstNonWhiteSpace(const QString & t)126 static QChar firstNonWhiteSpace(const QString &t)
127 {
128     int i = 0;
129     while (i < t.length()) {
130         if (!t[i].isSpace())
131             return t[i];
132         i++;
133     }
134     return QChar::Null;
135 }
136 
137 /*
138     Returns \c true if string t is made only of white space; otherwise
139     returns \c false.
140 */
isOnlyWhiteSpace(const QString & t)141 static bool isOnlyWhiteSpace(const QString &t)
142 {
143     return firstNonWhiteSpace(t).isNull();
144 }
145 
146 /*
147     Assuming string t is a line, returns the column number of a given
148     index. Column numbers and index are identical for strings that don't
149     contain '\t's.
150 */
columnForIndex(const QString & t,int index)151 int columnForIndex(const QString &t, int index)
152 {
153     int col = 0;
154     if (index > t.length())
155         index = t.length();
156 
157     for (int i = 0; i < index; i++) {
158         if (t[i] == QChar('\t')) {
159             col = ((col / ppHardwareTabSize) + 1) * ppHardwareTabSize;
160         } else {
161             col++;
162         }
163     }
164     return col;
165 }
166 
167 /*
168     Returns the indentation size of string t.
169 */
indentOfLine(const QString & t)170 int indentOfLine(const QString &t)
171 {
172     return columnForIndex(t, t.indexOf(firstNonWhiteSpace(t)));
173 }
174 
175 /*
176     Replaces t[k] by ch, unless t[k] is '\t'. Tab characters are better
177     left alone since they break the "index equals column" rule. No
178     provisions are taken against '\n' or '\r', which shouldn't occur in
179     t anyway.
180 */
eraseChar(QString & t,int k,QChar ch)181 static inline void eraseChar(QString &t, int k, QChar ch)
182 {
183     if (t[k] != '\t')
184         t[k] = ch;
185 }
186 
187 /*
188    Removes some nefast constructs from a code line and returns the
189    resulting line.
190 */
trimmedCodeLine(const QString & t)191 static QString trimmedCodeLine(const QString &t)
192 {
193     QString trimmed = t;
194     int k;
195 
196     /*
197         Replace character and string literals by X's, since they may
198         contain confusing characters (such as '{' and ';'). "Hello!" is
199         replaced by XXXXXXXX. The literals are rigourously of the same
200         length before and after; otherwise, we would break alignment of
201         continuation lines.
202     */
203     k = 0;
204     while ((k = trimmed.indexOf(*literal, k)) != -1) {
205         for (int i = 0; i < literal->matchedLength(); i++)
206             eraseChar(trimmed, k + i, 'X');
207         k += literal->matchedLength();
208     }
209 
210     /*
211         Replace inline C-style comments by spaces. Other comments are
212         handled elsewhere.
213     */
214     k = 0;
215     while ((k = trimmed.indexOf(*inlineCComment, k)) != -1) {
216         for (int i = 0; i < inlineCComment->matchedLength(); i++)
217             eraseChar(trimmed, k + i, ' ');
218         k += inlineCComment->matchedLength();
219     }
220 
221     /*
222         Replace goto and switch labels by whitespace, but be careful
223         with this case:
224 
225         foo1: bar1;
226                 bar2;
227     */
228     while (trimmed.lastIndexOf(':') != -1 && trimmed.indexOf(*label) != -1) {
229         QString cap1 = label->cap(1);
230         int pos1 = label->pos(1);
231         int stop = cap1.length();
232 
233         if (pos1 + stop < trimmed.length() && ppIndentSize < stop)
234             stop = ppIndentSize;
235 
236         int i = 0;
237         while (i < stop) {
238             eraseChar(trimmed, pos1 + i, ' ');
239             i++;
240         }
241         while (i < cap1.length()) {
242             eraseChar(trimmed, pos1 + i, ';');
243             i++;
244         }
245     }
246 
247     /*
248         Remove C++-style comments.
249     */
250     k = trimmed.indexOf("//");
251     if (k != -1)
252         trimmed.truncate(k);
253 
254     return trimmed;
255 }
256 
257 /*
258     Returns '(' if the last parenthesis is opening, ')' if it is
259     closing, and QChar::Null if there are no parentheses in t.
260 */
lastParen(const QString & t)261 static inline QChar lastParen(const QString &t)
262 {
263     int i = t.length();
264     while (i > 0) {
265         i--;
266         if (t[i] == QChar('(') || t[i] == QChar(')'))
267             return t[i];
268     }
269     return QChar::Null;
270 }
271 
272 /*
273     Returns \c true if typedIn the same as okayCh or is null; otherwise
274     returns \c false.
275 */
okay(QChar typedIn,QChar okayCh)276 static inline bool okay(QChar typedIn, QChar okayCh)
277 {
278     return typedIn == QChar::Null || typedIn == okayCh;
279 }
280 
281 /*
282     The "linizer" is a group of functions and variables to iterate
283     through the source code of the program to indent. The program is
284     given as a list of strings, with the bottom line being the line
285     to indent. The actual program might contain extra lines, but
286     those are uninteresting and not passed over to us.
287 */
288 
289 struct LinizerState
290 {
291     QString line;
292     int braceDepth;
293     bool leftBraceFollows;
294 
295     QStringList::ConstIterator iter;
296     bool inCComment;
297     bool pendingRightBrace;
298 };
299 
300 static QStringList *yyProgram = nullptr;
301 static LinizerState *yyLinizerState = nullptr;
302 
303 // shorthands
304 static const QString *yyLine = nullptr;
305 static const int *yyBraceDepth = nullptr;
306 static const bool *yyLeftBraceFollows = nullptr;
307 
308 /*
309     Saves and restores the state of the global linizer. This enables
310     backtracking.
311 */
312 #define YY_SAVE() LinizerState savedState = *yyLinizerState
313 #define YY_RESTORE() *yyLinizerState = savedState
314 
315 /*
316     Advances to the previous line in yyProgram and update yyLine
317     accordingly. yyLine is cleaned from comments and other damageable
318     constructs. Empty lines are skipped.
319 */
readLine()320 static bool readLine()
321 {
322     int k;
323 
324     yyLinizerState->leftBraceFollows = (firstNonWhiteSpace(yyLinizerState->line) == QChar('{'));
325 
326     do {
327         if (yyLinizerState->iter == yyProgram->constBegin()) {
328             yyLinizerState->line.clear();
329             return false;
330         }
331 
332         --yyLinizerState->iter;
333         yyLinizerState->line = *yyLinizerState->iter;
334 
335         yyLinizerState->line = trimmedCodeLine(yyLinizerState->line);
336 
337         /*
338             Remove C-style comments that span multiple lines. If the
339             bottom line starts in a C-style comment, we are not aware
340             of that and eventually yyLine will contain a slash-aster.
341 
342             Notice that both if's can be executed, since
343             yyLinizerState->inCComment is potentially set to false in
344             the first if. The order of the if's is also important.
345         */
346 
347         if (yyLinizerState->inCComment) {
348             QString slashAster("/*");
349 
350             k = yyLinizerState->line.indexOf(slashAster);
351             if (k == -1) {
352                 yyLinizerState->line.clear();
353             } else {
354                 yyLinizerState->line.truncate(k);
355                 yyLinizerState->inCComment = false;
356             }
357         }
358 
359         if (!yyLinizerState->inCComment) {
360             QString asterSlash("*/");
361 
362             k = yyLinizerState->line.indexOf(asterSlash);
363             if (k != -1) {
364                 for (int i = 0; i < k + 2; i++)
365                     eraseChar(yyLinizerState->line, i, ' ');
366                 yyLinizerState->inCComment = true;
367             }
368         }
369 
370         /*
371             Remove preprocessor directives.
372         */
373         k = 0;
374         while (k < (int)yyLinizerState->line.length()) {
375             QChar ch = yyLinizerState->line[k];
376             if (ch == QChar('#')) {
377                 yyLinizerState->line.clear();
378             } else if (!ch.isSpace()) {
379                 break;
380             }
381             k++;
382         }
383 
384         /*
385             Remove trailing spaces.
386         */
387         k = yyLinizerState->line.length();
388         while (k > 0 && yyLinizerState->line[k - 1].isSpace())
389             k--;
390         yyLinizerState->line.truncate(k);
391 
392         /*
393             '}' increment the brace depth and '{' decrements it and not
394             the other way around, as we are parsing backwards.
395         */
396         yyLinizerState->braceDepth +=
397                 yyLinizerState->line.count('}') - yyLinizerState->line.count('{');
398 
399         /*
400             We use a dirty trick for
401 
402                 } else ...
403 
404             We don't count the '}' yet, so that it's more or less
405             equivalent to the friendly construct
406 
407                 }
408                 else ...
409         */
410         if (yyLinizerState->pendingRightBrace)
411             yyLinizerState->braceDepth++;
412         yyLinizerState->pendingRightBrace = (yyLinizerState->line.indexOf(*braceX) == 0);
413         if (yyLinizerState->pendingRightBrace)
414             yyLinizerState->braceDepth--;
415     } while (yyLinizerState->line.isEmpty());
416 
417     return true;
418 }
419 
420 /*
421     Resets the linizer to its initial state, with yyLine containing the
422     line above the bottom line of the program.
423 */
startLinizer()424 static void startLinizer()
425 {
426     yyLinizerState->braceDepth = 0;
427     yyLinizerState->inCComment = false;
428     yyLinizerState->pendingRightBrace = false;
429 
430     yyLine = &yyLinizerState->line;
431     yyBraceDepth = &yyLinizerState->braceDepth;
432     yyLeftBraceFollows = &yyLinizerState->leftBraceFollows;
433 
434     yyLinizerState->iter = yyProgram->constEnd();
435     --yyLinizerState->iter;
436     yyLinizerState->line = *yyLinizerState->iter;
437     readLine();
438 }
439 
440 /*
441     Returns \c true if the start of the bottom line of yyProgram (and
442     potentially the whole line) is part of a C-style comment;
443     otherwise returns \c false.
444 */
bottomLineStartsInCComment()445 static bool bottomLineStartsInCComment()
446 {
447     QString slashAster("/*");
448     QString asterSlash("*/");
449 
450     /*
451         We could use the linizer here, but that would slow us down
452         terribly. We are better to trim only the code lines we need.
453     */
454     QStringList::ConstIterator p = yyProgram->constEnd();
455     --p; // skip bottom line
456 
457     for (int i = 0; i < BigRoof; i++) {
458         if (p == yyProgram->constBegin())
459             return false;
460         --p;
461 
462         if ((*p).indexOf(slashAster) != -1 || (*p).indexOf(asterSlash) != -1) {
463             QString trimmed = trimmedCodeLine(*p);
464 
465             if (trimmed.indexOf(slashAster) != -1) {
466                 return true;
467             } else if (trimmed.indexOf(asterSlash) != -1) {
468                 return false;
469             }
470         }
471     }
472     return false;
473 }
474 
475 /*
476     Returns the recommended indent for the bottom line of yyProgram
477     assuming that it starts in a C-style comment, a condition that is
478     tested elsewhere.
479 
480     Essentially, we're trying to align against some text on the
481     previous line.
482 */
indentWhenBottomLineStartsInCComment()483 static int indentWhenBottomLineStartsInCComment()
484 {
485     int k = yyLine->lastIndexOf("/*");
486     if (k == -1) {
487         /*
488           We found a normal text line in a comment. Align the
489           bottom line with the text on this line.
490         */
491         return indentOfLine(*yyLine);
492     } else {
493         /*
494           The C-style comment starts on this line. If there is
495           text on the same line, align with it. Otherwise, align
496           with the slash-aster plus a given offset.
497         */
498         int indent = columnForIndex(*yyLine, k);
499         k += 2;
500         while (k < (int)yyLine->length()) {
501             if (!(*yyLine)[k].isSpace())
502                 return columnForIndex(*yyLine, k);
503             k++;
504         }
505         return indent + ppCommentOffset;
506     }
507 }
508 
509 /*
510     A function called match...() modifies the linizer state. If it
511     returns \c true, yyLine is the top line of the matched construct;
512     otherwise, the linizer is left in an unknown state.
513 
514     A function called is...() keeps the linizer state intact.
515 */
516 
517 /*
518     Returns \c true if the current line (and upwards) forms a braceless
519     control statement; otherwise returns \c false.
520 
521     The first line of the following example is a "braceless control
522     statement":
523 
524         if ( x )
525             y;
526 */
matchBracelessControlStatement()527 static bool matchBracelessControlStatement()
528 {
529     int delimDepth = 0;
530 
531     if (yyLine->endsWith("else"))
532         return true;
533 
534     if (!yyLine->endsWith(QLatin1Char(')')))
535         return false;
536 
537     for (int i = 0; i < SmallRoof; i++) {
538         int j = yyLine->length();
539         while (j > 0) {
540             j--;
541             QChar ch = (*yyLine)[j];
542 
543             switch (ch.unicode()) {
544             case ')':
545                 delimDepth++;
546                 break;
547             case '(':
548                 delimDepth--;
549                 if (delimDepth == 0) {
550                     if (yyLine->indexOf(*iflikeKeyword) != -1) {
551                         /*
552                             We have
553 
554                                 if ( x )
555                                     y
556 
557                             "if ( x )" is not part of the statement
558                             "y".
559                         */
560                         return true;
561                     }
562                 }
563                 if (delimDepth == -1) {
564                     /*
565                       We have
566 
567                           if ( (1 +
568                                 2)
569 
570                       and not
571 
572                           if ( 1 +
573                                2 )
574                     */
575                     return false;
576                 }
577                 break;
578             case '{':
579             case '}':
580             case ';':
581                 /*
582                     We met a statement separator, but not where we
583                     expected it. What follows is probably a weird
584                     continuation line. Be careful with ';' in for,
585                     though.
586                 */
587                 if (ch != QChar(';') || delimDepth == 0)
588                     return false;
589             }
590         }
591 
592         if (!readLine())
593             break;
594     }
595     return false;
596 }
597 
598 /*
599     Returns \c true if yyLine is an unfinished line; otherwise returns
600     false.
601 
602     In many places we'll use the terms "standalone line", "unfinished
603     line" and "continuation line". The meaning of these should be
604     evident from this code example:
605 
606         a = b;    // standalone line
607         c = d +   // unfinished line
608             e +   // unfinished continuation line
609             f +   // unfinished continuation line
610             g;    // continuation line
611 */
isUnfinishedLine()612 static bool isUnfinishedLine()
613 {
614     bool unf = false;
615 
616     YY_SAVE();
617 
618     if (yyLine->isEmpty())
619         return false;
620 
621     QChar lastCh = (*yyLine)[(int)yyLine->length() - 1];
622     if (QString("{};").indexOf(lastCh) == -1 && !yyLine->endsWith("...")) {
623         /*
624           It doesn't end with ';' or similar. If it's neither
625           "Q_OBJECT" nor "if ( x )", it must be an unfinished line.
626         */
627         unf = (yyLine->indexOf("Q_OBJECT") == -1 && !matchBracelessControlStatement());
628     } else if (lastCh == QChar(';')) {
629         if (lastParen(*yyLine) == QChar('(')) {
630             /*
631               Exception:
632 
633                   for ( int i = 1; i < 10;
634             */
635             unf = true;
636         } else if (readLine() && yyLine->endsWith(QLatin1Char(';'))
637                    && lastParen(*yyLine) == QChar('(')) {
638             /*
639               Exception:
640 
641                   for ( int i = 1;
642                         i < 10;
643             */
644             unf = true;
645         }
646     }
647 
648     YY_RESTORE();
649     return unf;
650 }
651 
652 /*
653     Returns \c true if yyLine is a continuation line; otherwise returns
654     false.
655 */
isContinuationLine()656 static bool isContinuationLine()
657 {
658     bool cont = false;
659 
660     YY_SAVE();
661     if (readLine())
662         cont = isUnfinishedLine();
663     YY_RESTORE();
664     return cont;
665 }
666 
667 /*
668     Returns the recommended indent for the bottom line of yyProgram,
669     assuming it's a continuation line.
670 
671     We're trying to align the continuation line against some parenthesis
672     or other bracked left opened on a previous line, or some interesting
673     operator such as '='.
674 */
indentForContinuationLine()675 static int indentForContinuationLine()
676 {
677     int braceDepth = 0;
678     int delimDepth = 0;
679 
680     bool leftBraceFollowed = *yyLeftBraceFollows;
681 
682     for (int i = 0; i < SmallRoof; i++) {
683         int hook = -1;
684 
685         int j = yyLine->length();
686         while (j > 0 && hook < 0) {
687             j--;
688             QChar ch = (*yyLine)[j];
689 
690             switch (ch.unicode()) {
691             case ')':
692             case ']':
693                 delimDepth++;
694                 break;
695             case '}':
696                 braceDepth++;
697                 break;
698             case '(':
699             case '[':
700                 delimDepth--;
701                 /*
702                     An unclosed delimiter is a good place to align at,
703                     at least for some styles (including Qt's).
704                 */
705                 if (delimDepth == -1)
706                     hook = j;
707                 break;
708             case '{':
709                 braceDepth--;
710                 /*
711                     A left brace followed by other stuff on the same
712                     line is typically for an enum or an initializer.
713                     Such a brace must be treated just like the other
714                     delimiters.
715                 */
716                 if (braceDepth == -1) {
717                     if (j < (int)yyLine->length() - 1) {
718                         hook = j;
719                     } else {
720                         return 0; // shouldn't happen
721                     }
722                 }
723                 break;
724             case '=':
725                 /*
726                     An equal sign is a very natural alignment hook
727                     because it's usually the operator with the lowest
728                     precedence in statements it appears in. Case in
729                     point:
730 
731                         int x = 1 +
732                                 2;
733 
734                     However, we have to beware of constructs such as
735                     default arguments and explicit enum constant
736                     values:
737 
738                         void foo( int x = 0,
739                                   int y = 0 );
740 
741                     And not
742 
743                         void foo( int x = 0,
744                                         int y = 0 );
745 
746                     These constructs are caracterized by a ',' at the
747                     end of the unfinished lines or by unbalanced
748                     parentheses.
749                 */
750                 if (QString("!=<>").indexOf((*yyLine)[j - 1]) == -1 && (*yyLine)[j + 1] != '=') {
751                     if (braceDepth == 0 && delimDepth == 0 && j < (int)yyLine->length() - 1
752                         && !yyLine->endsWith(QLatin1Char(','))
753                         && (yyLine->contains('(') == yyLine->contains(')')))
754                         hook = j;
755                 }
756             }
757         }
758 
759         if (hook >= 0) {
760             /*
761                 Yes, we have a delimiter or an operator to align
762                 against! We don't really align against it, but rather
763                 against the following token, if any. In this example,
764                 the following token is "11":
765 
766                     int x = ( 11 +
767                               2 );
768 
769                 If there is no such token, we use a continuation indent:
770 
771                     static QRegExp foo( QString(
772                             "foo foo foo foo foo foo foo foo foo") );
773             */
774             hook++;
775             while (hook < (int)yyLine->length()) {
776                 if (!(*yyLine)[hook].isSpace())
777                     return columnForIndex(*yyLine, hook);
778                 hook++;
779             }
780             return indentOfLine(*yyLine) + ppContinuationIndentSize;
781         }
782 
783         if (braceDepth != 0)
784             break;
785 
786         /*
787             The line's delimiters are balanced. It looks like a
788             continuation line or something.
789         */
790         if (delimDepth == 0) {
791             if (leftBraceFollowed) {
792                 /*
793                     We have
794 
795                         int main()
796                         {
797 
798                     or
799 
800                         Bar::Bar()
801                             : Foo( x )
802                         {
803 
804                     The "{" should be flush left.
805                 */
806                 if (!isContinuationLine())
807                     return indentOfLine(*yyLine);
808             } else if (isContinuationLine() || yyLine->endsWith(QLatin1Char(','))) {
809                 /*
810                     We have
811 
812                         x = a +
813                             b +
814                             c;
815 
816                     or
817 
818                         int t[] = {
819                             1, 2, 3,
820                             4, 5, 6
821 
822                     The "c;" should fall right under the "b +", and the
823                     "4, 5, 6" right under the "1, 2, 3,".
824                 */
825                 return indentOfLine(*yyLine);
826             } else {
827                 /*
828                     We have
829 
830                         stream << 1 +
831                                 2;
832 
833                     We could, but we don't, try to analyze which
834                     operator has precedence over which and so on, to
835                     obtain the excellent result
836 
837                         stream << 1 +
838                                   2;
839 
840                     We do have a special trick above for the assignment
841                     operator above, though.
842                 */
843                 return indentOfLine(*yyLine) + ppContinuationIndentSize;
844             }
845         }
846 
847         if (!readLine())
848             break;
849     }
850     return 0;
851 }
852 
853 /*
854     Returns the recommended indent for the bottom line of yyProgram if
855     that line is standalone (or should be indented likewise).
856 
857     Indenting a standalone line is tricky, mostly because of braceless
858     control statements. Grossly, we are looking backwards for a special
859     line, a "hook line", that we can use as a starting point to indent,
860     and then modify the indentation level according to the braces met
861     along the way to that hook.
862 
863     Let's consider a few examples. In all cases, we want to indent the
864     bottom line.
865 
866     Example 1:
867 
868         x = 1;
869         y = 2;
870 
871     The hook line is "x = 1;". We met 0 opening braces and 0 closing
872     braces. Therefore, "y = 2;" inherits the indent of "x = 1;".
873 
874     Example 2:
875 
876         if ( x ) {
877             y;
878 
879     The hook line is "if ( x ) {". No matter what precedes it, "y;" has
880     to be indented one level deeper than the hook line, since we met one
881     opening brace along the way.
882 
883     Example 3:
884 
885         if ( a )
886             while ( b ) {
887                 c;
888             }
889         d;
890 
891     To indent "d;" correctly, we have to go as far as the "if ( a )".
892     Compare with
893 
894         if ( a ) {
895             while ( b ) {
896                 c;
897             }
898             d;
899 
900     Still, we're striving to go back as little as possible to
901     accommodate people with irregular indentation schemes. A hook line
902     near at hand is much more reliable than a remote one.
903 */
indentForStandaloneLine()904 static int indentForStandaloneLine()
905 {
906     for (int i = 0; i < SmallRoof; i++) {
907         if (!*yyLeftBraceFollows) {
908             YY_SAVE();
909 
910             if (matchBracelessControlStatement()) {
911                 /*
912                     The situation is this, and we want to indent "z;":
913 
914                         if ( x &&
915                              y )
916                             z;
917 
918                     yyLine is "if ( x &&".
919                 */
920                 return indentOfLine(*yyLine) + ppIndentSize;
921             }
922             YY_RESTORE();
923         }
924 
925         if (yyLine->endsWith(QLatin1Char(';')) || yyLine->contains('{')) {
926             /*
927                 The situation is possibly this, and we want to indent
928                 "z;":
929 
930                     while ( x )
931                         y;
932                     z;
933 
934                 We return the indent of "while ( x )". In place of "y;",
935                 any arbitrarily complex compound statement can appear.
936             */
937 
938             if (*yyBraceDepth > 0) {
939                 do {
940                     if (!readLine())
941                         break;
942                 } while (*yyBraceDepth > 0);
943             }
944 
945             LinizerState hookState;
946 
947             while (isContinuationLine())
948                 readLine();
949             hookState = *yyLinizerState;
950 
951             readLine();
952             if (*yyBraceDepth <= 0) {
953                 do {
954                     if (!matchBracelessControlStatement())
955                         break;
956                     hookState = *yyLinizerState;
957                 } while (readLine());
958             }
959 
960             *yyLinizerState = hookState;
961 
962             while (isContinuationLine())
963                 readLine();
964 
965             /*
966               Never trust lines containing only '{' or '}', as some
967               people (Richard M. Stallman) format them weirdly.
968             */
969             if (yyLine->trimmed().length() > 1)
970                 return indentOfLine(*yyLine) - *yyBraceDepth * ppIndentSize;
971         }
972 
973         if (!readLine())
974             return -*yyBraceDepth * ppIndentSize;
975     }
976     return 0;
977 }
978 
979 /*
980     Constructs global variables used by the indenter.
981 */
initializeIndenter()982 static void initializeIndenter()
983 {
984     literal = new QRegExp("([\"'])(?:\\\\.|[^\\\\])*\\1");
985     literal->setMinimal(true);
986     label = new QRegExp("^\\s*((?:case\\b([^:]|::)+|[a-zA-Z_0-9]+)(?:\\s+slots)?:)(?!:)");
987     inlineCComment = new QRegExp("/\\*.*\\*/");
988     inlineCComment->setMinimal(true);
989     braceX = new QRegExp("^\\s*\\}\\s*(?:else|catch)\\b");
990     iflikeKeyword = new QRegExp("\\b(?:catch|do|for|if|while)\\b");
991 
992     yyLinizerState = new LinizerState;
993 }
994 
995 /*
996     Destroys global variables used by the indenter.
997 */
terminateIndenter()998 static void terminateIndenter()
999 {
1000     delete literal;
1001     delete label;
1002     delete inlineCComment;
1003     delete braceX;
1004     delete iflikeKeyword;
1005     delete yyLinizerState;
1006 }
1007 
1008 /*
1009     Returns the recommended indent for the bottom line of program.
1010     Unless null, typedIn stores the character of yyProgram that
1011     triggered reindentation.
1012 
1013     This function works better if typedIn is set properly; it is
1014     slightly more conservative if typedIn is completely wild, and
1015     slighly more liberal if typedIn is always null. The user might be
1016     annoyed by the liberal behavior.
1017 */
indentForBottomLine(const QStringList & program,QChar typedIn)1018 int indentForBottomLine(const QStringList &program, QChar typedIn)
1019 {
1020     if (program.isEmpty())
1021         return 0;
1022 
1023     initializeIndenter();
1024 
1025     yyProgram = new QStringList(program);
1026     startLinizer();
1027 
1028     const QString &bottomLine = program.last();
1029     QChar firstCh = firstNonWhiteSpace(bottomLine);
1030     int indent;
1031 
1032     if (bottomLineStartsInCComment()) {
1033         /*
1034             The bottom line starts in a C-style comment. Indent it
1035             smartly, unless the user has already played around with it,
1036             in which case it's better to leave her stuff alone.
1037         */
1038         if (isOnlyWhiteSpace(bottomLine)) {
1039             indent = indentWhenBottomLineStartsInCComment();
1040         } else {
1041             indent = indentOfLine(bottomLine);
1042         }
1043     } else if (okay(typedIn, '#') && firstCh == QChar('#')) {
1044         /*
1045             Preprocessor directives go flush left.
1046         */
1047         indent = 0;
1048     } else {
1049         if (isUnfinishedLine()) {
1050             indent = indentForContinuationLine();
1051         } else {
1052             indent = indentForStandaloneLine();
1053         }
1054 
1055         if (okay(typedIn, '}') && firstCh == QChar('}')) {
1056             /*
1057                 A closing brace is one level more to the left than the
1058                 code it follows.
1059             */
1060             indent -= ppIndentSize;
1061         } else if (okay(typedIn, ':')) {
1062             QRegExp caseLabel("\\s*(?:case\\b(?:[^:]|::)+"
1063                               "|(?:public|protected|private|signals|default)(?:\\s+slots)?\\s*"
1064                               ")?:.*");
1065 
1066             if (caseLabel.exactMatch(bottomLine)) {
1067                 /*
1068                     Move a case label (or the ':' in front of a
1069                     constructor initialization list) one level to the
1070                     left, but only if the user did not play around with
1071                     it yet. Some users have exotic tastes in the
1072                     matter, and most users probably are not patient
1073                     enough to wait for the final ':' to format their
1074                     code properly.
1075 
1076                     We don't attempt the same for goto labels, as the
1077                     user is probably the middle of "foo::bar". (Who
1078                     uses goto, anyway?)
1079                 */
1080                 if (indentOfLine(bottomLine) <= indent)
1081                     indent -= ppIndentSize;
1082                 else
1083                     indent = indentOfLine(bottomLine);
1084             }
1085         }
1086     }
1087     delete yyProgram;
1088     terminateIndenter();
1089     return qMax(0, indent);
1090 }
1091 
1092 QT_END_NAMESPACE
1093 
1094 #ifdef Q_TEST_YYINDENT
1095 /*
1096   Test driver.
1097 */
1098 
1099 #    include <qfile.h>
1100 #    include <qtextstream.h>
1101 
1102 #    include <errno.h>
1103 
1104 QT_BEGIN_NAMESPACE
1105 
fileContents(const QString & fileName)1106 static QString fileContents(const QString &fileName)
1107 {
1108     QFile f(fileName);
1109     if (!f.open(QFile::ReadOnly)) {
1110         qWarning("yyindent error: Cannot open file '%s' for reading: %s",
1111                  fileName.toLatin1().data(), strerror(errno));
1112         return QString();
1113     }
1114 
1115     QTextStream t(&f);
1116     QString contents = t.read();
1117     f.close();
1118     if (contents.isEmpty())
1119         qWarning("yyindent error: File '%s' is empty", fileName.toLatin1().data());
1120     return contents;
1121 }
1122 
1123 QT_END_NAMESPACE
1124 
main(int argc,char ** argv)1125 int main(int argc, char **argv)
1126 {
1127     QT_USE_NAMESPACE
1128 
1129     if (argc != 2) {
1130         qWarning("usage: yyindent file.cpp");
1131         return 1;
1132     }
1133 
1134     QString code = fileContents(argv[1]);
1135     QStringList program = QStringList::split('\n', code, true);
1136     QStringList p;
1137     QString out;
1138 
1139     while (!program.isEmpty() && program.last().trimmed().isEmpty())
1140         program.remove(program.fromLast());
1141 
1142     QStringList::ConstIterator line = program.constBegin();
1143     while (line != program.constEnd()) {
1144         p.push_back(*line);
1145         QChar typedIn = firstNonWhiteSpace(*line);
1146         if (p.last().endsWith(QLatin1Char(':')))
1147             typedIn = ':';
1148 
1149         int indent = indentForBottomLine(p, typedIn);
1150 
1151         if (!(*line).trimmed().isEmpty()) {
1152             for (int j = 0; j < indent; j++)
1153                 out += QLatin1Char(' ');
1154             out += (*line).trimmed();
1155         }
1156         out += QLatin1Char('\n');
1157         ++line;
1158     }
1159 
1160     while (out.endsWith(QLatin1Char('\n')))
1161         out.truncate(out.length() - 1);
1162 
1163     printf("%s\n", out.toLatin1().data());
1164     return 0;
1165 }
1166 
1167 #endif // Q_TEST_YYINDENT
1168