1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11 /// fixes namespace end comments.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "NamespaceEndCommentsFixer.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/Regex.h"
18 
19 #define DEBUG_TYPE "namespace-end-comments-fixer"
20 
21 namespace clang {
22 namespace format {
23 
24 namespace {
25 // The maximal number of unwrapped lines that a short namespace spans.
26 // Short namespaces don't need an end comment.
27 static const int kShortNamespaceMaxLines = 1;
28 
29 // Computes the name of a namespace given the namespace token.
30 // Returns "" for anonymous namespace.
31 std::string computeName(const FormatToken *NamespaceTok) {
32   assert(NamespaceTok &&
33          NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
34          "expecting a namespace token");
35   std::string name = "";
36   const FormatToken *Tok = NamespaceTok->getNextNonComment();
37   if (NamespaceTok->is(TT_NamespaceMacro)) {
38     // Collects all the non-comment tokens between opening parenthesis
39     // and closing parenthesis or comma.
40     assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
41     Tok = Tok->getNextNonComment();
42     while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
43       name += Tok->TokenText;
44       Tok = Tok->getNextNonComment();
45     }
46   } else {
47     // For `namespace [[foo]] A::B::inline C {` or
48     // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
49     // Peek for the first '::' (or '{') and then return all tokens from one
50     // token before that up until the '{'.
51     const FormatToken *FirstNSTok = Tok;
52     while (Tok && !Tok->is(tok::l_brace) && !Tok->is(tok::coloncolon)) {
53       FirstNSTok = Tok;
54       Tok = Tok->getNextNonComment();
55     }
56 
57     Tok = FirstNSTok;
58     while (Tok && !Tok->is(tok::l_brace)) {
59       name += Tok->TokenText;
60       if (Tok->is(tok::kw_inline))
61         name += " ";
62       Tok = Tok->getNextNonComment();
63     }
64   }
65   return name;
66 }
67 
68 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
69                                   const FormatToken *NamespaceTok) {
70   std::string text = "// ";
71   text += NamespaceTok->TokenText;
72   if (NamespaceTok->is(TT_NamespaceMacro))
73     text += "(";
74   else if (!NamespaceName.empty())
75     text += ' ';
76   text += NamespaceName;
77   if (NamespaceTok->is(TT_NamespaceMacro))
78     text += ")";
79   if (AddNewline)
80     text += '\n';
81   return text;
82 }
83 
84 bool hasEndComment(const FormatToken *RBraceTok) {
85   return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
86 }
87 
88 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
89                      const FormatToken *NamespaceTok) {
90   assert(hasEndComment(RBraceTok));
91   const FormatToken *Comment = RBraceTok->Next;
92 
93   // Matches a valid namespace end comment.
94   // Valid namespace end comments don't need to be edited.
95   static const llvm::Regex NamespaceCommentPattern =
96       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
97                   "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
98                   llvm::Regex::IgnoreCase);
99   static const llvm::Regex NamespaceMacroCommentPattern =
100       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
101                   "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
102                   llvm::Regex::IgnoreCase);
103 
104   SmallVector<StringRef, 8> Groups;
105   if (NamespaceTok->is(TT_NamespaceMacro) &&
106       NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
107     StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
108     // The name of the macro must be used.
109     if (NamespaceTokenText != NamespaceTok->TokenText)
110       return false;
111   } else if (NamespaceTok->isNot(tok::kw_namespace) ||
112              !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
113     // Comment does not match regex.
114     return false;
115   }
116   StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
117   // Anonymous namespace comments must not mention a namespace name.
118   if (NamespaceName.empty() && !NamespaceNameInComment.empty())
119     return false;
120   StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
121   // Named namespace comments must not mention anonymous namespace.
122   if (!NamespaceName.empty() && !AnonymousInComment.empty())
123     return false;
124   return NamespaceNameInComment == NamespaceName;
125 }
126 
127 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
128                    const SourceManager &SourceMgr,
129                    tooling::Replacements *Fixes) {
130   auto EndLoc = RBraceTok->Tok.getEndLoc();
131   auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
132   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
133   if (Err) {
134     llvm::errs() << "Error while adding namespace end comment: "
135                  << llvm::toString(std::move(Err)) << "\n";
136   }
137 }
138 
139 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
140                       const SourceManager &SourceMgr,
141                       tooling::Replacements *Fixes) {
142   assert(hasEndComment(RBraceTok));
143   const FormatToken *Comment = RBraceTok->Next;
144   auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
145                                              Comment->Tok.getEndLoc());
146   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
147   if (Err) {
148     llvm::errs() << "Error while updating namespace end comment: "
149                  << llvm::toString(std::move(Err)) << "\n";
150   }
151 }
152 } // namespace
153 
154 const FormatToken *
155 getNamespaceToken(const AnnotatedLine *Line,
156                   const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
157   if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
158     return nullptr;
159   size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
160   if (StartLineIndex == UnwrappedLine::kInvalidIndex)
161     return nullptr;
162   assert(StartLineIndex < AnnotatedLines.size());
163   const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
164   if (NamespaceTok->is(tok::l_brace)) {
165     // "namespace" keyword can be on the line preceding '{', e.g. in styles
166     // where BraceWrapping.AfterNamespace is true.
167     if (StartLineIndex > 0)
168       NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
169   }
170   return NamespaceTok->getNamespaceToken();
171 }
172 
173 StringRef
174 getNamespaceTokenText(const AnnotatedLine *Line,
175                       const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
176   const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
177   return NamespaceTok ? NamespaceTok->TokenText : StringRef();
178 }
179 
180 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
181                                                      const FormatStyle &Style)
182     : TokenAnalyzer(Env, Style) {}
183 
184 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
185     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
186     FormatTokenLexer &Tokens) {
187   const SourceManager &SourceMgr = Env.getSourceManager();
188   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
189   tooling::Replacements Fixes;
190   std::string AllNamespaceNames = "";
191   size_t StartLineIndex = SIZE_MAX;
192   StringRef NamespaceTokenText;
193   unsigned int CompactedNamespacesCount = 0;
194   for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
195     const AnnotatedLine *EndLine = AnnotatedLines[I];
196     const FormatToken *NamespaceTok =
197         getNamespaceToken(EndLine, AnnotatedLines);
198     if (!NamespaceTok)
199       continue;
200     FormatToken *RBraceTok = EndLine->First;
201     if (RBraceTok->Finalized)
202       continue;
203     RBraceTok->Finalized = true;
204     const FormatToken *EndCommentPrevTok = RBraceTok;
205     // Namespaces often end with '};'. In that case, attach namespace end
206     // comments to the semicolon tokens.
207     if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) {
208       EndCommentPrevTok = RBraceTok->Next;
209     }
210     if (StartLineIndex == SIZE_MAX)
211       StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
212     std::string NamespaceName = computeName(NamespaceTok);
213     if (Style.CompactNamespaces) {
214       if (CompactedNamespacesCount == 0)
215         NamespaceTokenText = NamespaceTok->TokenText;
216       if ((I + 1 < E) &&
217           NamespaceTokenText ==
218               getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
219           StartLineIndex - CompactedNamespacesCount - 1 ==
220               AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
221           !AnnotatedLines[I + 1]->First->Finalized) {
222         if (hasEndComment(EndCommentPrevTok)) {
223           // remove end comment, it will be merged in next one
224           updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
225         }
226         CompactedNamespacesCount++;
227         AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
228         continue;
229       }
230       NamespaceName += AllNamespaceNames;
231       CompactedNamespacesCount = 0;
232       AllNamespaceNames = std::string();
233     }
234     // The next token in the token stream after the place where the end comment
235     // token must be. This is either the next token on the current line or the
236     // first token on the next line.
237     const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
238     if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
239       EndCommentNextTok = EndCommentNextTok->Next;
240     if (!EndCommentNextTok && I + 1 < E)
241       EndCommentNextTok = AnnotatedLines[I + 1]->First;
242     bool AddNewline = EndCommentNextTok &&
243                       EndCommentNextTok->NewlinesBefore == 0 &&
244                       EndCommentNextTok->isNot(tok::eof);
245     const std::string EndCommentText =
246         computeEndCommentText(NamespaceName, AddNewline, NamespaceTok);
247     if (!hasEndComment(EndCommentPrevTok)) {
248       bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1;
249       if (!isShort)
250         addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
251     } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
252                                 NamespaceTok)) {
253       updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
254     }
255     StartLineIndex = SIZE_MAX;
256   }
257   return {Fixes, 0};
258 }
259 
260 } // namespace format
261 } // namespace clang
262