1 //===--- Format.cpp -----------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "Format.h"
9 #include "support/Logger.h"
10 #include "clang/Basic/FileManager.h"
11 #include "clang/Basic/SourceManager.h"
12 #include "clang/Format/Format.h"
13 #include "clang/Lex/Lexer.h"
14 #include "clang/Tooling/Core/Replacement.h"
15 #include "llvm/Support/Unicode.h"
16 
17 namespace clang {
18 namespace clangd {
19 namespace {
20 
21 /// Append closing brackets )]} to \p Code to make it well-formed.
22 /// Clang-format conservatively refuses to format files with unmatched brackets
23 /// as it isn't sure where the errors are and so can't correct.
24 /// When editing, it's reasonable to assume code before the cursor is complete.
closeBrackets(std::string & Code,const format::FormatStyle & Style)25 void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
26   SourceManagerForFile FileSM("mock_file.cpp", Code);
27   auto &SM = FileSM.get();
28   FileID FID = SM.getMainFileID();
29   Lexer Lex(FID, SM.getBufferOrFake(FID), SM,
30             format::getFormattingLangOpts(Style));
31   Token Tok;
32   std::vector<char> Brackets;
33   while (!Lex.LexFromRawLexer(Tok)) {
34     switch(Tok.getKind()) {
35       case tok::l_paren:
36         Brackets.push_back(')');
37         break;
38       case tok::l_brace:
39         Brackets.push_back('}');
40         break;
41       case tok::l_square:
42         Brackets.push_back(']');
43         break;
44       case tok::r_paren:
45         if (!Brackets.empty() && Brackets.back() == ')')
46           Brackets.pop_back();
47         break;
48       case tok::r_brace:
49         if (!Brackets.empty() && Brackets.back() == '}')
50           Brackets.pop_back();
51         break;
52       case tok::r_square:
53         if (!Brackets.empty() && Brackets.back() == ']')
54           Brackets.pop_back();
55         break;
56       default:
57         continue;
58     }
59   }
60   // Attempt to end any open comments first.
61   Code.append("\n// */\n");
62   Code.append(Brackets.rbegin(), Brackets.rend());
63 }
64 
commentMarker(llvm::StringRef Line)65 static StringRef commentMarker(llvm::StringRef Line) {
66   for (StringRef Marker : {"///", "//"}){
67     auto I = Line.rfind(Marker);
68     if (I != StringRef::npos)
69       return Line.substr(I, Marker.size());
70   }
71   return "";
72 }
73 
firstLine(llvm::StringRef Code)74 llvm::StringRef firstLine(llvm::StringRef Code) {
75   return Code.take_until([](char C) { return C == '\n'; });
76 }
77 
lastLine(llvm::StringRef Code)78 llvm::StringRef lastLine(llvm::StringRef Code) {
79   llvm::StringRef Rest = Code;
80   while (!Rest.empty() && Rest.back() != '\n')
81     Rest = Rest.drop_back();
82   return Code.substr(Rest.size());
83 }
84 
85 // Filename is needed for tooling::Replacement and some overloads of reformat().
86 // Its value should not affect the outcome. We use the default from reformat().
87 llvm::StringRef Filename = "<stdin>";
88 
89 // tooling::Replacement from overlapping StringRefs: From must be part of Code.
replacement(llvm::StringRef Code,llvm::StringRef From,llvm::StringRef To)90 tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
91                                  llvm::StringRef To) {
92   assert(From.begin() >= Code.begin() && From.end() <= Code.end());
93   // The filename is required but ignored.
94   return tooling::Replacement(Filename, From.data() - Code.data(),
95                               From.size(), To);
96 }
97 
98 // High-level representation of incremental formatting changes.
99 // The changes are made in two steps.
100 // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
101 //    comment markers when splitting a line comment with a newline).
102 // 2) a selective clang-format run:
103 //    - the "source code" passed to clang format is the code up to the cursor,
104 //      a placeholder for the cursor, and some closing brackets
105 //    - the formatting is restricted to the cursor and (possibly) other ranges
106 //      (e.g. the old line when inserting a newline).
107 //    - changes before the cursor are applied, those after are discarded.
108 struct IncrementalChanges {
109   // Changes that should be applied before running clang-format.
110   tooling::Replacements Changes;
111   // Ranges of the original source code that should be clang-formatted.
112   // The CursorProxyText will also be formatted.
113   std::vector<tooling::Range> FormatRanges;
114   // The source code that should stand in for the cursor when clang-formatting.
115   // e.g. after inserting a newline, a line-comment at the cursor is used to
116   // ensure that the newline is preserved.
117   std::string CursorPlaceholder;
118 };
119 
120 // After a newline:
121 //  - we continue any line-comment that was split
122 //  - we format the old line in addition to the cursor
123 //  - we represent the cursor with a line comment to preserve the newline
getIncrementalChangesAfterNewline(llvm::StringRef Code,unsigned Cursor)124 IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
125                                                      unsigned Cursor) {
126   IncrementalChanges Result;
127   // Before newline, code looked like:
128   //    leading^trailing
129   // After newline, code looks like:
130   //    leading
131   //    indentation^trailing
132   // Where indentation was added by the editor.
133   StringRef Trailing = firstLine(Code.substr(Cursor));
134   StringRef Indentation = lastLine(Code.take_front(Cursor));
135   if (Indentation.data() == Code.data()) {
136     vlog("Typed a newline, but we're still on the first line!");
137     return Result;
138   }
139   StringRef Leading =
140       lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
141   StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
142 
143   // Strip leading whitespace on trailing line.
144   StringRef TrailingTrim = Trailing.ltrim();
145   if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
146     cantFail(Result.Changes.add(
147         replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
148 
149   // If we split a comment, replace indentation with a comment marker.
150   // If the editor made the new line a comment, also respect that.
151   StringRef CommentMarker = commentMarker(Leading);
152   bool NewLineIsComment = !commentMarker(Indentation).empty();
153   if (!CommentMarker.empty() &&
154       (NewLineIsComment || !commentMarker(NextLine).empty() ||
155        (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
156     using llvm::sys::unicode::columnWidthUTF8;
157     // We indent the new comment to match the previous one.
158     StringRef PreComment =
159         Leading.take_front(CommentMarker.data() - Leading.data());
160     std::string IndentAndComment =
161         (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
162             .str();
163     cantFail(
164         Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
165   } else {
166     // Remove any indentation and let clang-format re-add it.
167     // This prevents the cursor marker dragging e.g. an aligned comment with it.
168     cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
169   }
170 
171   // If we put a the newline inside a {} pair, put } on its own line...
172   if (CommentMarker.empty() && Leading.endswith("{") &&
173       Trailing.startswith("}")) {
174     cantFail(
175         Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
176     // ...and format it.
177     Result.FormatRanges.push_back(
178         tooling::Range(Trailing.data() - Code.data() + 1, 1));
179   }
180 
181   // Format the whole leading line.
182   Result.FormatRanges.push_back(
183       tooling::Range(Leading.data() - Code.data(), Leading.size()));
184 
185   // We use a comment to represent the cursor, to preserve the newline.
186   // A trailing identifier improves parsing of e.g. for without braces.
187   // Exception: if the previous line has a trailing comment, we can't use one
188   // as the cursor (they will be aligned). But in this case we don't need to.
189   Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
190 
191   return Result;
192 }
193 
getIncrementalChanges(llvm::StringRef Code,unsigned Cursor,llvm::StringRef InsertedText)194 IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
195                                          llvm::StringRef InsertedText) {
196   IncrementalChanges Result;
197   if (InsertedText == "\n")
198     return getIncrementalChangesAfterNewline(Code, Cursor);
199 
200   Result.CursorPlaceholder = " /**/";
201   return Result;
202 }
203 
204 // Returns equivalent replacements that preserve the correspondence between
205 // OldCursor and NewCursor. If OldCursor lies in a replaced region, that
206 // replacement will be split.
207 std::vector<tooling::Replacement>
split(const tooling::Replacements & Replacements,unsigned OldCursor,unsigned NewCursor)208 split(const tooling::Replacements &Replacements, unsigned OldCursor,
209       unsigned NewCursor) {
210   std::vector<tooling::Replacement> Result;
211   int LengthChange = 0;
212   for (const tooling::Replacement &R : Replacements) {
213     if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
214       Result.push_back(R);
215       LengthChange += R.getReplacementText().size() - R.getLength();
216     } else if (R.getOffset() < OldCursor) { // overlaps cursor
217       int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
218       assert(ReplacementSplit >= 0 &&
219              ReplacementSplit <= int(R.getReplacementText().size()) &&
220              "NewCursor incompatible with OldCursor!");
221       Result.push_back(tooling::Replacement(
222           R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
223           R.getReplacementText().take_front(ReplacementSplit)));
224       Result.push_back(tooling::Replacement(
225           R.getFilePath(), OldCursor,
226           R.getLength() - (OldCursor - R.getOffset()),
227           R.getReplacementText().drop_front(ReplacementSplit)));
228     } else if (R.getOffset() >= OldCursor) { // after cursor
229       Result.push_back(R);
230     }
231   }
232   return Result;
233 }
234 
235 } // namespace
236 
237 // We're simulating the following sequence of changes:
238 //   - apply the pre-formatting edits (see getIncrementalChanges)
239 //   - insert a placeholder for the cursor
240 //   - format some of the resulting code
241 //   - remove the cursor placeholder again
242 // The replacements we return are produced by composing these.
243 //
244 // The text we actually pass to clang-format is slightly different from this,
245 // e.g. we have to close brackets. We ensure these differences are *after*
246 // all the regions we want to format, and discard changes in them.
247 std::vector<tooling::Replacement>
formatIncremental(llvm::StringRef OriginalCode,unsigned OriginalCursor,llvm::StringRef InsertedText,format::FormatStyle Style)248 formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
249                   llvm::StringRef InsertedText, format::FormatStyle Style) {
250   IncrementalChanges Incremental =
251       getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
252   // Never *remove* lines in response to pressing enter! This annoys users.
253   if (InsertedText == "\n") {
254     Style.MaxEmptyLinesToKeep = 1000;
255     Style.KeepEmptyLinesAtTheStartOfBlocks = true;
256   }
257 
258   // Compute the code we want to format:
259   // 1) Start with code after the pre-formatting edits.
260   std::string CodeToFormat = cantFail(
261       tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
262   unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
263   // 2) Truncate code after the last interesting range.
264   unsigned FormatLimit = Cursor;
265   for (tooling::Range &R : Incremental.FormatRanges)
266     FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
267   CodeToFormat.resize(FormatLimit);
268   // 3) Insert a placeholder for the cursor.
269   CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
270   // 4) Append brackets after FormatLimit so the code is well-formed.
271   closeBrackets(CodeToFormat, Style);
272 
273   // Determine the ranges to format:
274   std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
275   // Ranges after the cursor need to be adjusted for the placeholder.
276   for (auto &R : RangesToFormat) {
277     if (R.getOffset() > Cursor)
278       R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
279                          R.getLength());
280   }
281   // We also format the cursor.
282   RangesToFormat.push_back(
283       tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
284   // Also update FormatLimit for the placeholder, we'll use this later.
285   FormatLimit += Incremental.CursorPlaceholder.size();
286 
287   // Run clang-format, and truncate changes at FormatLimit.
288   tooling::Replacements FormattingChanges;
289   format::FormattingAttemptStatus Status;
290   for (const tooling::Replacement &R : format::reformat(
291            Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
292     if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
293       cantFail(FormattingChanges.add(R));
294     else if(R.getOffset() < FormatLimit) { // Overlaps limit.
295       if (R.getReplacementText().empty()) // Deletions are easy to handle.
296         cantFail(FormattingChanges.add(tooling::Replacement(Filename,
297             R.getOffset(), FormatLimit - R.getOffset(), "")));
298       else
299         // Hopefully won't happen in practice?
300         elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
301              Cursor, CodeToFormat);
302     }
303   }
304   if (!Status.FormatComplete)
305     vlog("Incremental format incomplete at line {0}", Status.Line);
306 
307   // Now we are ready to compose the changes relative to OriginalCode.
308   //   edits -> insert placeholder -> format -> remove placeholder.
309   // We must express insert/remove as Replacements.
310   tooling::Replacements InsertCursorPlaceholder(
311       tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
312   unsigned FormattedCursorStart =
313                FormattingChanges.getShiftedCodePosition(Cursor),
314            FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
315                Cursor + Incremental.CursorPlaceholder.size());
316   tooling::Replacements RemoveCursorPlaceholder(
317       tooling::Replacement(Filename, FormattedCursorStart,
318                            FormattedCursorEnd - FormattedCursorStart, ""));
319 
320   // We can't simply merge() and return: tooling::Replacements will combine
321   // adjacent edits left and right of the cursor. This gives the right source
322   // code, but loses information about where the cursor is!
323   // Fortunately, none of the individual passes lose information, so:
324   //  - we use merge() to compute the final Replacements
325   //  - we chain getShiftedCodePosition() to compute final cursor position
326   //  - we split the final Replacements at the cursor position, so that
327   //    each Replacement lies either before or after the cursor.
328   tooling::Replacements Final;
329   unsigned FinalCursor = OriginalCursor;
330 #ifndef NDEBUG
331   std::string FinalCode = std::string(OriginalCode);
332   dlog("Initial code: {0}", FinalCode);
333 #endif
334   for (auto Pass :
335        std::vector<std::pair<const char *, const tooling::Replacements *>>{
336            {"Pre-formatting changes", &Incremental.Changes},
337            {"Insert placeholder", &InsertCursorPlaceholder},
338            {"clang-format", &FormattingChanges},
339            {"Remove placeholder", &RemoveCursorPlaceholder}}) {
340     Final = Final.merge(*Pass.second);
341     FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
342 #ifndef NDEBUG
343     FinalCode =
344         cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
345     dlog("After {0}:\n{1}^{2}", Pass.first,
346          StringRef(FinalCode).take_front(FinalCursor),
347          StringRef(FinalCode).drop_front(FinalCursor));
348 #endif
349   }
350   return split(Final, OriginalCursor, FinalCursor);
351 }
352 
353 unsigned
transformCursorPosition(unsigned Offset,const std::vector<tooling::Replacement> & Replacements)354 transformCursorPosition(unsigned Offset,
355                         const std::vector<tooling::Replacement> &Replacements) {
356   unsigned OriginalOffset = Offset;
357   for (const auto &R : Replacements) {
358     if (R.getOffset() + R.getLength() <= OriginalOffset) {
359       // Replacement is before cursor.
360       Offset += R.getReplacementText().size();
361       Offset -= R.getLength();
362     } else if (R.getOffset() < OriginalOffset) {
363       // Replacement overlaps cursor.
364       // Preserve position within replacement text, as far as possible.
365       unsigned PositionWithinReplacement = Offset - R.getOffset();
366       if (PositionWithinReplacement > R.getReplacementText().size()) {
367         Offset += R.getReplacementText().size();
368         Offset -= PositionWithinReplacement;
369       }
370     } else {
371       // Replacement after cursor.
372       break; // Replacements are sorted, the rest are also after the cursor.
373     }
374   }
375   return Offset;
376 }
377 
378 } // namespace clangd
379 } // namespace clang
380