1 //===-- clang-format/ClangFormat.cpp - Clang format tool ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a clang-format tool that automatically formats
11 /// (fragments of) C++ code.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticOptions.h"
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Basic/Version.h"
20 #include "clang/Format/Format.h"
21 #include "clang/Rewrite/Core/Rewriter.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/InitLLVM.h"
25 #include "llvm/Support/Process.h"
26 
27 using namespace llvm;
28 using clang::tooling::Replacements;
29 
30 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
31 
32 // Mark all our options with this category, everything else (except for -version
33 // and -help) will be hidden.
34 static cl::OptionCategory ClangFormatCategory("Clang-format options");
35 
36 static cl::list<unsigned>
37     Offsets("offset",
38             cl::desc("Format a range starting at this byte offset.\n"
39                      "Multiple ranges can be formatted by specifying\n"
40                      "several -offset and -length pairs.\n"
41                      "Can only be used with one input file."),
42             cl::cat(ClangFormatCategory));
43 static cl::list<unsigned>
44     Lengths("length",
45             cl::desc("Format a range of this length (in bytes).\n"
46                      "Multiple ranges can be formatted by specifying\n"
47                      "several -offset and -length pairs.\n"
48                      "When only a single -offset is specified without\n"
49                      "-length, clang-format will format up to the end\n"
50                      "of the file.\n"
51                      "Can only be used with one input file."),
52             cl::cat(ClangFormatCategory));
53 static cl::list<std::string>
54 LineRanges("lines", cl::desc("<start line>:<end line> - format a range of\n"
55                              "lines (both 1-based).\n"
56                              "Multiple ranges can be formatted by specifying\n"
57                              "several -lines arguments.\n"
58                              "Can't be used with -offset and -length.\n"
59                              "Can only be used with one input file."),
60            cl::cat(ClangFormatCategory));
61 static cl::opt<std::string>
62     Style("style", cl::desc(clang::format::StyleOptionHelpDescription),
63           cl::init(clang::format::DefaultFormatStyle),
64           cl::cat(ClangFormatCategory));
65 static cl::opt<std::string>
66     FallbackStyle("fallback-style",
67                   cl::desc("The name of the predefined style used as a\n"
68                            "fallback in case clang-format is invoked with\n"
69                            "-style=file, but can not find the .clang-format\n"
70                            "file to use.\n"
71                            "Use -fallback-style=none to skip formatting."),
72                   cl::init(clang::format::DefaultFallbackStyle),
73                   cl::cat(ClangFormatCategory));
74 
75 static cl::opt<std::string>
76 AssumeFileName("assume-filename",
77                cl::desc("When reading from stdin, clang-format assumes this\n"
78                         "filename to look for a style config file (with\n"
79                         "-style=file) and to determine the language."),
80                cl::init("<stdin>"), cl::cat(ClangFormatCategory));
81 
82 static cl::opt<bool> Inplace("i",
83                              cl::desc("Inplace edit <file>s, if specified."),
84                              cl::cat(ClangFormatCategory));
85 
86 static cl::opt<bool> OutputXML("output-replacements-xml",
87                                cl::desc("Output replacements as XML."),
88                                cl::cat(ClangFormatCategory));
89 static cl::opt<bool>
90     DumpConfig("dump-config",
91                cl::desc("Dump configuration options to stdout and exit.\n"
92                         "Can be used with -style option."),
93                cl::cat(ClangFormatCategory));
94 static cl::opt<unsigned>
95     Cursor("cursor",
96            cl::desc("The position of the cursor when invoking\n"
97                     "clang-format from an editor integration"),
98            cl::init(0), cl::cat(ClangFormatCategory));
99 
100 static cl::opt<bool> SortIncludes(
101     "sort-includes",
102     cl::desc("If set, overrides the include sorting behavior determined by the "
103              "SortIncludes style flag"),
104     cl::cat(ClangFormatCategory));
105 
106 static cl::opt<bool>
107     Verbose("verbose", cl::desc("If set, shows the list of processed files"),
108             cl::cat(ClangFormatCategory));
109 
110 static cl::list<std::string> FileNames(cl::Positional, cl::desc("[<file> ...]"),
111                                        cl::cat(ClangFormatCategory));
112 
113 namespace clang {
114 namespace format {
115 
116 static FileID createInMemoryFile(StringRef FileName, MemoryBuffer *Source,
117                                  SourceManager &Sources, FileManager &Files,
118                                  llvm::vfs::InMemoryFileSystem *MemFS) {
119   MemFS->addFileNoOwn(FileName, 0, Source);
120   return Sources.createFileID(Files.getFile(FileName), SourceLocation(),
121                               SrcMgr::C_User);
122 }
123 
124 // Parses <start line>:<end line> input to a pair of line numbers.
125 // Returns true on error.
126 static bool parseLineRange(StringRef Input, unsigned &FromLine,
127                            unsigned &ToLine) {
128   std::pair<StringRef, StringRef> LineRange = Input.split(':');
129   return LineRange.first.getAsInteger(0, FromLine) ||
130          LineRange.second.getAsInteger(0, ToLine);
131 }
132 
133 static bool fillRanges(MemoryBuffer *Code,
134                        std::vector<tooling::Range> &Ranges) {
135   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
136       new llvm::vfs::InMemoryFileSystem);
137   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
138   DiagnosticsEngine Diagnostics(
139       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
140       new DiagnosticOptions);
141   SourceManager Sources(Diagnostics, Files);
142   FileID ID = createInMemoryFile("<irrelevant>", Code, Sources, Files,
143                                  InMemoryFileSystem.get());
144   if (!LineRanges.empty()) {
145     if (!Offsets.empty() || !Lengths.empty()) {
146       errs() << "error: cannot use -lines with -offset/-length\n";
147       return true;
148     }
149 
150     for (unsigned i = 0, e = LineRanges.size(); i < e; ++i) {
151       unsigned FromLine, ToLine;
152       if (parseLineRange(LineRanges[i], FromLine, ToLine)) {
153         errs() << "error: invalid <start line>:<end line> pair\n";
154         return true;
155       }
156       if (FromLine > ToLine) {
157         errs() << "error: start line should be less than end line\n";
158         return true;
159       }
160       SourceLocation Start = Sources.translateLineCol(ID, FromLine, 1);
161       SourceLocation End = Sources.translateLineCol(ID, ToLine, UINT_MAX);
162       if (Start.isInvalid() || End.isInvalid())
163         return true;
164       unsigned Offset = Sources.getFileOffset(Start);
165       unsigned Length = Sources.getFileOffset(End) - Offset;
166       Ranges.push_back(tooling::Range(Offset, Length));
167     }
168     return false;
169   }
170 
171   if (Offsets.empty())
172     Offsets.push_back(0);
173   if (Offsets.size() != Lengths.size() &&
174       !(Offsets.size() == 1 && Lengths.empty())) {
175     errs() << "error: number of -offset and -length arguments must match.\n";
176     return true;
177   }
178   for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
179     if (Offsets[i] >= Code->getBufferSize()) {
180       errs() << "error: offset " << Offsets[i] << " is outside the file\n";
181       return true;
182     }
183     SourceLocation Start =
184         Sources.getLocForStartOfFile(ID).getLocWithOffset(Offsets[i]);
185     SourceLocation End;
186     if (i < Lengths.size()) {
187       if (Offsets[i] + Lengths[i] > Code->getBufferSize()) {
188         errs() << "error: invalid length " << Lengths[i]
189                << ", offset + length (" << Offsets[i] + Lengths[i]
190                << ") is outside the file.\n";
191         return true;
192       }
193       End = Start.getLocWithOffset(Lengths[i]);
194     } else {
195       End = Sources.getLocForEndOfFile(ID);
196     }
197     unsigned Offset = Sources.getFileOffset(Start);
198     unsigned Length = Sources.getFileOffset(End) - Offset;
199     Ranges.push_back(tooling::Range(Offset, Length));
200   }
201   return false;
202 }
203 
204 static void outputReplacementXML(StringRef Text) {
205   // FIXME: When we sort includes, we need to make sure the stream is correct
206   // utf-8.
207   size_t From = 0;
208   size_t Index;
209   while ((Index = Text.find_first_of("\n\r<&", From)) != StringRef::npos) {
210     outs() << Text.substr(From, Index - From);
211     switch (Text[Index]) {
212     case '\n':
213       outs() << "&#10;";
214       break;
215     case '\r':
216       outs() << "&#13;";
217       break;
218     case '<':
219       outs() << "&lt;";
220       break;
221     case '&':
222       outs() << "&amp;";
223       break;
224     default:
225       llvm_unreachable("Unexpected character encountered!");
226     }
227     From = Index + 1;
228   }
229   outs() << Text.substr(From);
230 }
231 
232 static void outputReplacementsXML(const Replacements &Replaces) {
233   for (const auto &R : Replaces) {
234     outs() << "<replacement "
235            << "offset='" << R.getOffset() << "' "
236            << "length='" << R.getLength() << "'>";
237     outputReplacementXML(R.getReplacementText());
238     outs() << "</replacement>\n";
239   }
240 }
241 
242 // Returns true on error.
243 static bool format(StringRef FileName) {
244   if (!OutputXML && Inplace && FileName == "-") {
245     errs() << "error: cannot use -i when reading from stdin.\n";
246     return false;
247   }
248   // On Windows, overwriting a file with an open file mapping doesn't work,
249   // so read the whole file into memory when formatting in-place.
250   ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
251       !OutputXML && Inplace ? MemoryBuffer::getFileAsStream(FileName) :
252                               MemoryBuffer::getFileOrSTDIN(FileName);
253   if (std::error_code EC = CodeOrErr.getError()) {
254     errs() << EC.message() << "\n";
255     return true;
256   }
257   std::unique_ptr<llvm::MemoryBuffer> Code = std::move(CodeOrErr.get());
258   if (Code->getBufferSize() == 0)
259     return false; // Empty files are formatted correctly.
260 
261   // Check to see if the buffer has a UTF Byte Order Mark (BOM).
262   // We only support UTF-8 with and without a BOM right now.  See
263   // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
264   // for more information.
265   StringRef BufStr = Code->getBuffer();
266   const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
267     .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
268                                                   "UTF-32 (BE)")
269     .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
270                                                   "UTF-32 (LE)")
271     .StartsWith("\xFE\xFF", "UTF-16 (BE)")
272     .StartsWith("\xFF\xFE", "UTF-16 (LE)")
273     .StartsWith("\x2B\x2F\x76", "UTF-7")
274     .StartsWith("\xF7\x64\x4C", "UTF-1")
275     .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
276     .StartsWith("\x0E\xFE\xFF", "SCSU")
277     .StartsWith("\xFB\xEE\x28", "BOCU-1")
278     .StartsWith("\x84\x31\x95\x33", "GB-18030")
279     .Default(nullptr);
280 
281   if (InvalidBOM) {
282     errs() << "error: encoding with unsupported byte order mark \""
283            << InvalidBOM << "\" detected";
284     if (FileName != "-")
285       errs() << " in file '" << FileName << "'";
286     errs() << ".\n";
287     return true;
288   }
289 
290   std::vector<tooling::Range> Ranges;
291   if (fillRanges(Code.get(), Ranges))
292     return true;
293   StringRef AssumedFileName = (FileName == "-") ? AssumeFileName : FileName;
294 
295   llvm::Expected<FormatStyle> FormatStyle =
296       getStyle(Style, AssumedFileName, FallbackStyle, Code->getBuffer());
297   if (!FormatStyle) {
298     llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
299     return true;
300   }
301 
302   if (SortIncludes.getNumOccurrences() != 0)
303     FormatStyle->SortIncludes = SortIncludes;
304   unsigned CursorPosition = Cursor;
305   Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges,
306                                        AssumedFileName, &CursorPosition);
307   auto ChangedCode = tooling::applyAllReplacements(Code->getBuffer(), Replaces);
308   if (!ChangedCode) {
309     llvm::errs() << llvm::toString(ChangedCode.takeError()) << "\n";
310     return true;
311   }
312   // Get new affected ranges after sorting `#includes`.
313   Ranges = tooling::calculateRangesAfterReplacements(Replaces, Ranges);
314   FormattingAttemptStatus Status;
315   Replacements FormatChanges = reformat(*FormatStyle, *ChangedCode, Ranges,
316                                         AssumedFileName, &Status);
317   Replaces = Replaces.merge(FormatChanges);
318   if (OutputXML) {
319     outs() << "<?xml version='1.0'?>\n<replacements "
320               "xml:space='preserve' incomplete_format='"
321            << (Status.FormatComplete ? "false" : "true") << "'";
322     if (!Status.FormatComplete)
323       outs() << " line='" << Status.Line << "'";
324     outs() << ">\n";
325     if (Cursor.getNumOccurrences() != 0)
326       outs() << "<cursor>"
327              << FormatChanges.getShiftedCodePosition(CursorPosition)
328              << "</cursor>\n";
329 
330     outputReplacementsXML(Replaces);
331     outs() << "</replacements>\n";
332   } else {
333     IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
334         new llvm::vfs::InMemoryFileSystem);
335     FileManager Files(FileSystemOptions(), InMemoryFileSystem);
336     DiagnosticsEngine Diagnostics(
337         IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
338         new DiagnosticOptions);
339     SourceManager Sources(Diagnostics, Files);
340     FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files,
341                                    InMemoryFileSystem.get());
342     Rewriter Rewrite(Sources, LangOptions());
343     tooling::applyAllReplacements(Replaces, Rewrite);
344     if (Inplace) {
345       if (Rewrite.overwriteChangedFiles())
346         return true;
347     } else {
348       if (Cursor.getNumOccurrences() != 0) {
349         outs() << "{ \"Cursor\": "
350                << FormatChanges.getShiftedCodePosition(CursorPosition)
351                << ", \"IncompleteFormat\": "
352                << (Status.FormatComplete ? "false" : "true");
353         if (!Status.FormatComplete)
354           outs() << ", \"Line\": " << Status.Line;
355         outs() << " }\n";
356       }
357       Rewrite.getEditBuffer(ID).write(outs());
358     }
359   }
360   return false;
361 }
362 
363 }  // namespace format
364 }  // namespace clang
365 
366 static void PrintVersion(raw_ostream &OS) {
367   OS << clang::getClangToolFullVersion("clang-format") << '\n';
368 }
369 
370 int main(int argc, const char **argv) {
371   llvm::InitLLVM X(argc, argv);
372 
373   cl::HideUnrelatedOptions(ClangFormatCategory);
374 
375   cl::SetVersionPrinter(PrintVersion);
376   cl::ParseCommandLineOptions(
377       argc, argv,
378       "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n"
379       "If no arguments are specified, it formats the code from standard input\n"
380       "and writes the result to the standard output.\n"
381       "If <file>s are given, it reformats the files. If -i is specified\n"
382       "together with <file>s, the files are edited in-place. Otherwise, the\n"
383       "result is written to the standard output.\n");
384 
385   if (Help) {
386     cl::PrintHelpMessage();
387     return 0;
388   }
389 
390   if (DumpConfig) {
391     StringRef FileName;
392     std::unique_ptr<llvm::MemoryBuffer> Code;
393     if (FileNames.empty()) {
394       // We can't read the code to detect the language if there's no
395       // file name, so leave Code empty here.
396       FileName = AssumeFileName;
397     } else {
398       // Read in the code in case the filename alone isn't enough to
399       // detect the language.
400       ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
401           MemoryBuffer::getFileOrSTDIN(FileNames[0]);
402       if (std::error_code EC = CodeOrErr.getError()) {
403         llvm::errs() << EC.message() << "\n";
404         return 1;
405       }
406       FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0];
407       Code = std::move(CodeOrErr.get());
408     }
409     llvm::Expected<clang::format::FormatStyle> FormatStyle =
410         clang::format::getStyle(Style, FileName, FallbackStyle,
411                                 Code ? Code->getBuffer() : "");
412     if (!FormatStyle) {
413       llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
414       return 1;
415     }
416     std::string Config = clang::format::configurationAsText(*FormatStyle);
417     outs() << Config << "\n";
418     return 0;
419   }
420 
421   bool Error = false;
422   if (FileNames.empty()) {
423     Error = clang::format::format("-");
424     return Error ? 1 : 0;
425   }
426   if (FileNames.size() != 1 && (!Offsets.empty() || !Lengths.empty() || !LineRanges.empty())) {
427     errs() << "error: -offset, -length and -lines can only be used for "
428               "single file.\n";
429     return 1;
430   }
431   for (const auto &FileName : FileNames) {
432     if (Verbose)
433       errs() << "Formatting " << FileName << "\n";
434     Error |= clang::format::format(FileName);
435   }
436   return Error ? 1 : 0;
437 }
438