1 //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the \c FormatTokenSource interface, which provides a token
11 /// stream as well as the ability to manipulate the token stream.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17 
18 #include "FormatToken.h"
19 #include "UnwrappedLineParser.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include <cstddef>
22 
23 #define DEBUG_TYPE "format-token-source"
24 
25 namespace clang {
26 namespace format {
27 
28 // Navigate a token stream.
29 //
30 // Enables traversal of a token stream, resetting the position in a token
31 // stream, as well as inserting new tokens.
32 class FormatTokenSource {
33 public:
34   virtual ~FormatTokenSource() {}
35 
36   // Returns the next token in the token stream.
37   virtual FormatToken *getNextToken() = 0;
38 
39   // Returns the token preceding the token returned by the last call to
40   // getNextToken() in the token stream, or nullptr if no such token exists.
41   //
42   // Must not be called directly at the position directly after insertTokens()
43   // is called.
44   virtual FormatToken *getPreviousToken() = 0;
45 
46   // Returns the token that would be returned by the next call to
47   // getNextToken().
48   virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
49 
50   // Returns whether we are at the end of the file.
51   // This can be different from whether getNextToken() returned an eof token
52   // when the FormatTokenSource is a view on a part of the token stream.
53   virtual bool isEOF() = 0;
54 
55   // Gets the current position in the token stream, to be used by setPosition().
56   //
57   // Note that the value of the position is not meaningful, and specifically
58   // should not be used to get relative token positions.
59   virtual unsigned getPosition() = 0;
60 
61   // Resets the token stream to the state it was in when getPosition() returned
62   // Position, and return the token at that position in the stream.
63   virtual FormatToken *setPosition(unsigned Position) = 0;
64 
65   // Insert the given tokens before the current position.
66   // Returns the first token in \c Tokens.
67   // The next returned token will be the second token in \c Tokens.
68   // Requires the last token in Tokens to be EOF; once the EOF token is reached,
69   // the next token will be the last token returned by getNextToken();
70   //
71   // For example, given the token sequence 'a1 a2':
72   // getNextToken() -> a1
73   // insertTokens('b1 b2') -> b1
74   // getNextToken() -> b2
75   // getNextToken() -> a1
76   // getNextToken() -> a2
77   virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
78 };
79 
80 class IndexedTokenSource : public FormatTokenSource {
81 public:
82   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
83       : Tokens(Tokens), Position(-1) {}
84 
85   FormatToken *getNextToken() override {
86     if (Position >= 0 && isEOF()) {
87       LLVM_DEBUG({
88         llvm::dbgs() << "Next ";
89         dbgToken(Position);
90       });
91       return Tokens[Position];
92     }
93     Position = successor(Position);
94     LLVM_DEBUG({
95       llvm::dbgs() << "Next ";
96       dbgToken(Position);
97     });
98     return Tokens[Position];
99   }
100 
101   FormatToken *getPreviousToken() override {
102     assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
103     return Position > 0 ? Tokens[Position - 1] : nullptr;
104   }
105 
106   FormatToken *peekNextToken(bool SkipComment = false) override {
107     if (isEOF())
108       return Tokens[Position];
109     int Next = successor(Position);
110     if (SkipComment)
111       while (Tokens[Next]->is(tok::comment))
112         Next = successor(Next);
113     LLVM_DEBUG({
114       llvm::dbgs() << "Peeking ";
115       dbgToken(Next);
116     });
117     return Tokens[Next];
118   }
119 
120   bool isEOF() override {
121     return Position == -1 ? false : Tokens[Position]->is(tok::eof);
122   }
123 
124   unsigned getPosition() override {
125     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
126     assert(Position >= 0);
127     return Position;
128   }
129 
130   FormatToken *setPosition(unsigned P) override {
131     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
132     Position = P;
133     return Tokens[Position];
134   }
135 
136   FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
137     assert(Position != -1);
138     assert((*New.rbegin())->Tok.is(tok::eof));
139     int Next = Tokens.size();
140     Tokens.append(New.begin(), New.end());
141     LLVM_DEBUG({
142       llvm::dbgs() << "Inserting:\n";
143       for (int I = Next, E = Tokens.size(); I != E; ++I)
144         dbgToken(I, "  ");
145       llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
146                    << Position << "\n";
147     });
148     Jumps[Tokens.size() - 1] = Position;
149     Position = Next;
150     LLVM_DEBUG({
151       llvm::dbgs() << "At inserted token ";
152       dbgToken(Position);
153     });
154     return Tokens[Position];
155   }
156 
157   void reset() { Position = -1; }
158 
159 private:
160   int successor(int Current) const {
161     int Next = Current + 1;
162     auto it = Jumps.find(Next);
163     if (it != Jumps.end()) {
164       Next = it->second;
165       assert(!Jumps.contains(Next));
166     }
167     return Next;
168   }
169 
170   void dbgToken(int Position, llvm::StringRef Indent = "") {
171     FormatToken *Tok = Tokens[Position];
172     llvm::dbgs() << Indent << "[" << Position
173                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
174                  << ", Macro: " << !!Tok->MacroCtx << "\n";
175   }
176 
177   SmallVector<FormatToken *> Tokens;
178   int Position;
179 
180   // Maps from position a to position b, so that when we reach a, the token
181   // stream continues at position b instead.
182   llvm::DenseMap<int, int> Jumps;
183 };
184 
185 class ScopedMacroState : public FormatTokenSource {
186 public:
187   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
188                    FormatToken *&ResetToken)
189       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
190         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
191         Token(nullptr), PreviousToken(nullptr) {
192     FakeEOF.Tok.startToken();
193     FakeEOF.Tok.setKind(tok::eof);
194     TokenSource = this;
195     Line.Level = 0;
196     Line.InPPDirective = true;
197     // InMacroBody gets set after the `#define x` part.
198   }
199 
200   ~ScopedMacroState() override {
201     TokenSource = PreviousTokenSource;
202     ResetToken = Token;
203     Line.InPPDirective = false;
204     Line.InMacroBody = false;
205     Line.Level = PreviousLineLevel;
206   }
207 
208   FormatToken *getNextToken() override {
209     // The \c UnwrappedLineParser guards against this by never calling
210     // \c getNextToken() after it has encountered the first eof token.
211     assert(!eof());
212     PreviousToken = Token;
213     Token = PreviousTokenSource->getNextToken();
214     if (eof())
215       return &FakeEOF;
216     return Token;
217   }
218 
219   FormatToken *getPreviousToken() override {
220     return PreviousTokenSource->getPreviousToken();
221   }
222 
223   FormatToken *peekNextToken(bool SkipComment) override {
224     if (eof())
225       return &FakeEOF;
226     return PreviousTokenSource->peekNextToken(SkipComment);
227   }
228 
229   bool isEOF() override { return PreviousTokenSource->isEOF(); }
230 
231   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
232 
233   FormatToken *setPosition(unsigned Position) override {
234     PreviousToken = nullptr;
235     Token = PreviousTokenSource->setPosition(Position);
236     return Token;
237   }
238 
239   FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
240     llvm_unreachable("Cannot insert tokens while parsing a macro.");
241     return nullptr;
242   }
243 
244 private:
245   bool eof() {
246     return Token && Token->HasUnescapedNewline &&
247            !continuesLineComment(*Token, PreviousToken,
248                                  /*MinColumnToken=*/PreviousToken);
249   }
250 
251   FormatToken FakeEOF;
252   UnwrappedLine &Line;
253   FormatTokenSource *&TokenSource;
254   FormatToken *&ResetToken;
255   unsigned PreviousLineLevel;
256   FormatTokenSource *PreviousTokenSource;
257 
258   FormatToken *Token;
259   FormatToken *PreviousToken;
260 };
261 
262 } // namespace format
263 } // namespace clang
264 
265 #undef DEBUG_TYPE
266 
267 #endif
268