1 //===--- Format.cpp -----------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "Format.h"
9 #include "support/Logger.h"
10 #include "clang/Basic/FileManager.h"
11 #include "clang/Basic/SourceManager.h"
12 #include "clang/Format/Format.h"
13 #include "clang/Lex/Lexer.h"
14 #include "clang/Tooling/Core/Replacement.h"
15 #include "llvm/Support/Unicode.h"
16 
17 namespace clang {
18 namespace clangd {
19 namespace {
20 
21 /// Append closing brackets )]} to \p Code to make it well-formed.
22 /// Clang-format conservatively refuses to format files with unmatched brackets
23 /// as it isn't sure where the errors are and so can't correct.
24 /// When editing, it's reasonable to assume code before the cursor is complete.
closeBrackets(std::string & Code,const format::FormatStyle & Style)25 void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
26   SourceManagerForFile FileSM("dummy.cpp", Code);
27   auto &SM = FileSM.get();
28   FileID FID = SM.getMainFileID();
29   Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
30   Token Tok;
31   std::vector<char> Brackets;
32   while (!Lex.LexFromRawLexer(Tok)) {
33     switch(Tok.getKind()) {
34       case tok::l_paren:
35         Brackets.push_back(')');
36         break;
37       case tok::l_brace:
38         Brackets.push_back('}');
39         break;
40       case tok::l_square:
41         Brackets.push_back(']');
42         break;
43       case tok::r_paren:
44         if (!Brackets.empty() && Brackets.back() == ')')
45           Brackets.pop_back();
46         break;
47       case tok::r_brace:
48         if (!Brackets.empty() && Brackets.back() == '}')
49           Brackets.pop_back();
50         break;
51       case tok::r_square:
52         if (!Brackets.empty() && Brackets.back() == ']')
53           Brackets.pop_back();
54         break;
55       default:
56         continue;
57     }
58   }
59   // Attempt to end any open comments first.
60   Code.append("\n// */\n");
61   Code.append(Brackets.rbegin(), Brackets.rend());
62 }
63 
commentMarker(llvm::StringRef Line)64 static StringRef commentMarker(llvm::StringRef Line) {
65   for (StringRef Marker : {"///", "//"}){
66     auto I = Line.rfind(Marker);
67     if (I != StringRef::npos)
68       return Line.substr(I, Marker.size());
69   }
70   return "";
71 }
72 
firstLine(llvm::StringRef Code)73 llvm::StringRef firstLine(llvm::StringRef Code) {
74   return Code.take_until([](char C) { return C == '\n'; });
75 }
76 
lastLine(llvm::StringRef Code)77 llvm::StringRef lastLine(llvm::StringRef Code) {
78   llvm::StringRef Rest = Code;
79   while (!Rest.empty() && Rest.back() != '\n')
80     Rest = Rest.drop_back();
81   return Code.substr(Rest.size());
82 }
83 
84 // Filename is needed for tooling::Replacement and some overloads of reformat().
85 // Its value should not affect the outcome. We use the default from reformat().
86 llvm::StringRef Filename = "<stdin>";
87 
88 // tooling::Replacement from overlapping StringRefs: From must be part of Code.
replacement(llvm::StringRef Code,llvm::StringRef From,llvm::StringRef To)89 tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
90                                  llvm::StringRef To) {
91   assert(From.begin() >= Code.begin() && From.end() <= Code.end());
92   // The filename is required but ignored.
93   return tooling::Replacement(Filename, From.data() - Code.data(),
94                               From.size(), To);
95 }
96 
97 // High-level representation of incremental formatting changes.
98 // The changes are made in two steps.
99 // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
100 //    comment markers when splitting a line comment with a newline).
101 // 2) a selective clang-format run:
102 //    - the "source code" passed to clang format is the code up to the cursor,
103 //      a placeholder for the cursor, and some closing brackets
104 //    - the formatting is restricted to the cursor and (possibly) other ranges
105 //      (e.g. the old line when inserting a newline).
106 //    - changes before the cursor are applied, those after are discarded.
107 struct IncrementalChanges {
108   // Changes that should be applied before running clang-format.
109   tooling::Replacements Changes;
110   // Ranges of the original source code that should be clang-formatted.
111   // The CursorProxyText will also be formatted.
112   std::vector<tooling::Range> FormatRanges;
113   // The source code that should stand in for the cursor when clang-formatting.
114   // e.g. after inserting a newline, a line-comment at the cursor is used to
115   // ensure that the newline is preserved.
116   std::string CursorPlaceholder;
117 };
118 
119 // After a newline:
120 //  - we continue any line-comment that was split
121 //  - we format the old line in addition to the cursor
122 //  - we represent the cursor with a line comment to preserve the newline
getIncrementalChangesAfterNewline(llvm::StringRef Code,unsigned Cursor)123 IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
124                                                      unsigned Cursor) {
125   IncrementalChanges Result;
126   // Before newline, code looked like:
127   //    leading^trailing
128   // After newline, code looks like:
129   //    leading
130   //    indentation^trailing
131   // Where indentation was added by the editor.
132   StringRef Trailing = firstLine(Code.substr(Cursor));
133   StringRef Indentation = lastLine(Code.take_front(Cursor));
134   if (Indentation.data() == Code.data()) {
135     vlog("Typed a newline, but we're still on the first line!");
136     return Result;
137   }
138   StringRef Leading =
139       lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
140   StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
141 
142   // Strip leading whitespace on trailing line.
143   StringRef TrailingTrim = Trailing.ltrim();
144   if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
145     cantFail(Result.Changes.add(
146         replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
147 
148   // If we split a comment, replace indentation with a comment marker.
149   // If the editor made the new line a comment, also respect that.
150   StringRef CommentMarker = commentMarker(Leading);
151   bool NewLineIsComment = !commentMarker(Indentation).empty();
152   if (!CommentMarker.empty() &&
153       (NewLineIsComment || !commentMarker(NextLine).empty() ||
154        (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
155     using llvm::sys::unicode::columnWidthUTF8;
156     // We indent the new comment to match the previous one.
157     StringRef PreComment =
158         Leading.take_front(CommentMarker.data() - Leading.data());
159     std::string IndentAndComment =
160         (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
161             .str();
162     cantFail(
163         Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
164   } else {
165     // Remove any indentation and let clang-format re-add it.
166     // This prevents the cursor marker dragging e.g. an aligned comment with it.
167     cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
168   }
169 
170   // If we put a the newline inside a {} pair, put } on its own line...
171   if (CommentMarker.empty() && Leading.endswith("{") &&
172       Trailing.startswith("}")) {
173     cantFail(
174         Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
175     // ...and format it.
176     Result.FormatRanges.push_back(
177         tooling::Range(Trailing.data() - Code.data() + 1, 1));
178   }
179 
180   // Format the whole leading line.
181   Result.FormatRanges.push_back(
182       tooling::Range(Leading.data() - Code.data(), Leading.size()));
183 
184   // We use a comment to represent the cursor, to preserve the newline.
185   // A trailing identifier improves parsing of e.g. for without braces.
186   // Exception: if the previous line has a trailing comment, we can't use one
187   // as the cursor (they will be aligned). But in this case we don't need to.
188   Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
189 
190   return Result;
191 }
192 
getIncrementalChanges(llvm::StringRef Code,unsigned Cursor,llvm::StringRef InsertedText)193 IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
194                                          llvm::StringRef InsertedText) {
195   IncrementalChanges Result;
196   if (InsertedText == "\n")
197     return getIncrementalChangesAfterNewline(Code, Cursor);
198 
199   Result.CursorPlaceholder = " /**/";
200   return Result;
201 }
202 
203 // Returns equivalent replacements that preserve the correspondence between
204 // OldCursor and NewCursor. If OldCursor lies in a replaced region, that
205 // replacement will be split.
206 std::vector<tooling::Replacement>
split(const tooling::Replacements & Replacements,unsigned OldCursor,unsigned NewCursor)207 split(const tooling::Replacements &Replacements, unsigned OldCursor,
208       unsigned NewCursor) {
209   std::vector<tooling::Replacement> Result;
210   int LengthChange = 0;
211   for (const tooling::Replacement &R : Replacements) {
212     if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
213       Result.push_back(R);
214       LengthChange += R.getReplacementText().size() - R.getLength();
215     } else if (R.getOffset() < OldCursor) { // overlaps cursor
216       int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
217       assert(ReplacementSplit >= 0 &&
218              ReplacementSplit <= int(R.getReplacementText().size()) &&
219              "NewCursor incompatible with OldCursor!");
220       Result.push_back(tooling::Replacement(
221           R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
222           R.getReplacementText().take_front(ReplacementSplit)));
223       Result.push_back(tooling::Replacement(
224           R.getFilePath(), OldCursor,
225           R.getLength() - (OldCursor - R.getOffset()),
226           R.getReplacementText().drop_front(ReplacementSplit)));
227     } else if (R.getOffset() >= OldCursor) { // after cursor
228       Result.push_back(R);
229     }
230   }
231   return Result;
232 }
233 
234 } // namespace
235 
236 // We're simulating the following sequence of changes:
237 //   - apply the pre-formatting edits (see getIncrementalChanges)
238 //   - insert a placeholder for the cursor
239 //   - format some of the resulting code
240 //   - remove the cursor placeholder again
241 // The replacements we return are produced by composing these.
242 //
243 // The text we actually pass to clang-format is slightly different from this,
244 // e.g. we have to close brackets. We ensure these differences are *after*
245 // all the regions we want to format, and discard changes in them.
246 std::vector<tooling::Replacement>
formatIncremental(llvm::StringRef OriginalCode,unsigned OriginalCursor,llvm::StringRef InsertedText,format::FormatStyle Style)247 formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
248                   llvm::StringRef InsertedText, format::FormatStyle Style) {
249   IncrementalChanges Incremental =
250       getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
251   // Never *remove* lines in response to pressing enter! This annoys users.
252   if (InsertedText == "\n") {
253     Style.MaxEmptyLinesToKeep = 1000;
254     Style.KeepEmptyLinesAtTheStartOfBlocks = true;
255   }
256 
257   // Compute the code we want to format:
258   // 1) Start with code after the pre-formatting edits.
259   std::string CodeToFormat = cantFail(
260       tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
261   unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
262   // 2) Truncate code after the last interesting range.
263   unsigned FormatLimit = Cursor;
264   for (tooling::Range &R : Incremental.FormatRanges)
265     FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
266   CodeToFormat.resize(FormatLimit);
267   // 3) Insert a placeholder for the cursor.
268   CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
269   // 4) Append brackets after FormatLimit so the code is well-formed.
270   closeBrackets(CodeToFormat, Style);
271 
272   // Determine the ranges to format:
273   std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
274   // Ranges after the cursor need to be adjusted for the placeholder.
275   for (auto &R : RangesToFormat) {
276     if (R.getOffset() > Cursor)
277       R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
278                          R.getLength());
279   }
280   // We also format the cursor.
281   RangesToFormat.push_back(
282       tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
283   // Also update FormatLimit for the placeholder, we'll use this later.
284   FormatLimit += Incremental.CursorPlaceholder.size();
285 
286   // Run clang-format, and truncate changes at FormatLimit.
287   tooling::Replacements FormattingChanges;
288   format::FormattingAttemptStatus Status;
289   for (const tooling::Replacement &R : format::reformat(
290            Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
291     if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
292       cantFail(FormattingChanges.add(R));
293     else if(R.getOffset() < FormatLimit) { // Overlaps limit.
294       if (R.getReplacementText().empty()) // Deletions are easy to handle.
295         cantFail(FormattingChanges.add(tooling::Replacement(Filename,
296             R.getOffset(), FormatLimit - R.getOffset(), "")));
297       else
298         // Hopefully won't happen in practice?
299         elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
300              Cursor, CodeToFormat);
301     }
302   }
303   if (!Status.FormatComplete)
304     vlog("Incremental format incomplete at line {0}", Status.Line);
305 
306   // Now we are ready to compose the changes relative to OriginalCode.
307   //   edits -> insert placeholder -> format -> remove placeholder.
308   // We must express insert/remove as Replacements.
309   tooling::Replacements InsertCursorPlaceholder(
310       tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
311   unsigned FormattedCursorStart =
312                FormattingChanges.getShiftedCodePosition(Cursor),
313            FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
314                Cursor + Incremental.CursorPlaceholder.size());
315   tooling::Replacements RemoveCursorPlaceholder(
316       tooling::Replacement(Filename, FormattedCursorStart,
317                            FormattedCursorEnd - FormattedCursorStart, ""));
318 
319   // We can't simply merge() and return: tooling::Replacements will combine
320   // adjacent edits left and right of the cursor. This gives the right source
321   // code, but loses information about where the cursor is!
322   // Fortunately, none of the individual passes lose information, so:
323   //  - we use merge() to compute the final Replacements
324   //  - we chain getShiftedCodePosition() to compute final cursor position
325   //  - we split the final Replacements at the cursor position, so that
326   //    each Replacement lies either before or after the cursor.
327   tooling::Replacements Final;
328   unsigned FinalCursor = OriginalCursor;
329 #ifndef NDEBUG
330   std::string FinalCode = std::string(OriginalCode);
331   dlog("Initial code: {0}", FinalCode);
332 #endif
333   for (auto Pass :
334        std::vector<std::pair<const char *, const tooling::Replacements *>>{
335            {"Pre-formatting changes", &Incremental.Changes},
336            {"Insert placeholder", &InsertCursorPlaceholder},
337            {"clang-format", &FormattingChanges},
338            {"Remove placeholder", &RemoveCursorPlaceholder}}) {
339     Final = Final.merge(*Pass.second);
340     FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
341 #ifndef NDEBUG
342     FinalCode =
343         cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
344     dlog("After {0}:\n{1}^{2}", Pass.first,
345          StringRef(FinalCode).take_front(FinalCursor),
346          StringRef(FinalCode).drop_front(FinalCursor));
347 #endif
348   }
349   return split(Final, OriginalCursor, FinalCursor);
350 }
351 
352 unsigned
transformCursorPosition(unsigned Offset,const std::vector<tooling::Replacement> & Replacements)353 transformCursorPosition(unsigned Offset,
354                         const std::vector<tooling::Replacement> &Replacements) {
355   unsigned OriginalOffset = Offset;
356   for (const auto &R : Replacements) {
357     if (R.getOffset() + R.getLength() <= OriginalOffset) {
358       // Replacement is before cursor.
359       Offset += R.getReplacementText().size();
360       Offset -= R.getLength();
361     } else if (R.getOffset() < OriginalOffset) {
362       // Replacement overlaps cursor.
363       // Preserve position within replacement text, as far as possible.
364       unsigned PositionWithinReplacement = Offset - R.getOffset();
365       if (PositionWithinReplacement > R.getReplacementText().size()) {
366         Offset += R.getReplacementText().size();
367         Offset -= PositionWithinReplacement;
368       }
369     } else {
370       // Replacement after cursor.
371       break; // Replacements are sorted, the rest are also after the cursor.
372     }
373   }
374   return Offset;
375 }
376 
377 } // namespace clangd
378 } // namespace clang
379