1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11 /// fixes namespace end comments.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "NamespaceEndCommentsFixer.h"
16 #include "clang/Basic/TokenKinds.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/Regex.h"
19 
20 #define DEBUG_TYPE "namespace-end-comments-fixer"
21 
22 namespace clang {
23 namespace format {
24 
25 namespace {
26 // Iterates all tokens starting from StartTok to EndTok and apply Fn to all
27 // tokens between them including StartTok and EndTok. Returns the token after
28 // EndTok.
29 const FormatToken *
30 processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
31               tok::TokenKind EndTok,
32               llvm::function_ref<void(const FormatToken *)> Fn) {
33   if (!Tok || Tok->isNot(StartTok))
34     return Tok;
35   int NestLevel = 0;
36   do {
37     if (Tok->is(StartTok))
38       ++NestLevel;
39     else if (Tok->is(EndTok))
40       --NestLevel;
41     if (Fn)
42       Fn(Tok);
43     Tok = Tok->getNextNonComment();
44   } while (Tok && NestLevel > 0);
45   return Tok;
46 }
47 
48 const FormatToken *skipAttribute(const FormatToken *Tok) {
49   if (!Tok)
50     return nullptr;
51   if (Tok->is(tok::kw___attribute)) {
52     Tok = Tok->getNextNonComment();
53     Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
54   } else if (Tok->is(tok::l_square)) {
55     Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
56   }
57   return Tok;
58 }
59 
60 // Computes the name of a namespace given the namespace token.
61 // Returns "" for anonymous namespace.
62 std::string computeName(const FormatToken *NamespaceTok) {
63   assert(NamespaceTok &&
64          NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
65          "expecting a namespace token");
66   std::string name;
67   const FormatToken *Tok = NamespaceTok->getNextNonComment();
68   if (NamespaceTok->is(TT_NamespaceMacro)) {
69     // Collects all the non-comment tokens between opening parenthesis
70     // and closing parenthesis or comma.
71     assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
72     Tok = Tok->getNextNonComment();
73     while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
74       name += Tok->TokenText;
75       Tok = Tok->getNextNonComment();
76     }
77     return name;
78   }
79   Tok = skipAttribute(Tok);
80 
81   std::string FirstNSName;
82   // For `namespace [[foo]] A::B::inline C {` or
83   // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
84   // Peek for the first '::' (or '{' or '(')) and then return all tokens from
85   // one token before that up until the '{'. A '(' might be a macro with
86   // arguments.
87   const FormatToken *FirstNSTok = nullptr;
88   while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
89     if (FirstNSTok)
90       FirstNSName += FirstNSTok->TokenText;
91     FirstNSTok = Tok;
92     Tok = Tok->getNextNonComment();
93   }
94 
95   if (FirstNSTok)
96     Tok = FirstNSTok;
97   Tok = skipAttribute(Tok);
98 
99   FirstNSTok = nullptr;
100   // Add everything from '(' to ')'.
101   auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
102   bool IsPrevColoncolon = false;
103   bool HasColoncolon = false;
104   bool IsPrevInline = false;
105   bool NameFinished = false;
106   // If we found '::' in name, then it's the name. Otherwise, we can't tell
107   // which one is name. For example, `namespace A B {`.
108   while (Tok && Tok->isNot(tok::l_brace)) {
109     if (FirstNSTok) {
110       if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
111         if (FirstNSTok->is(tok::l_paren)) {
112           FirstNSTok = Tok =
113               processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
114           continue;
115         }
116         if (FirstNSTok->isNot(tok::coloncolon)) {
117           NameFinished = true;
118           break;
119         }
120       }
121       name += FirstNSTok->TokenText;
122       IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
123       HasColoncolon = HasColoncolon || IsPrevColoncolon;
124       if (FirstNSTok->is(tok::kw_inline)) {
125         name += " ";
126         IsPrevInline = true;
127       }
128     }
129     FirstNSTok = Tok;
130     Tok = Tok->getNextNonComment();
131     const FormatToken *TokAfterAttr = skipAttribute(Tok);
132     if (TokAfterAttr != Tok)
133       FirstNSTok = Tok = TokAfterAttr;
134   }
135   if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
136     name += FirstNSTok->TokenText;
137   if (FirstNSName.empty() || HasColoncolon)
138     return name;
139   return name.empty() ? FirstNSName : FirstNSName + " " + name;
140 }
141 
142 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
143                                   const FormatToken *NamespaceTok,
144                                   unsigned SpacesToAdd) {
145   std::string text = "//";
146   text.append(SpacesToAdd, ' ');
147   text += NamespaceTok->TokenText;
148   if (NamespaceTok->is(TT_NamespaceMacro))
149     text += "(";
150   else if (!NamespaceName.empty())
151     text += ' ';
152   text += NamespaceName;
153   if (NamespaceTok->is(TT_NamespaceMacro))
154     text += ")";
155   if (AddNewline)
156     text += '\n';
157   return text;
158 }
159 
160 bool hasEndComment(const FormatToken *RBraceTok) {
161   return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
162 }
163 
164 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
165                      const FormatToken *NamespaceTok) {
166   assert(hasEndComment(RBraceTok));
167   const FormatToken *Comment = RBraceTok->Next;
168 
169   // Matches a valid namespace end comment.
170   // Valid namespace end comments don't need to be edited.
171   static const llvm::Regex NamespaceCommentPattern =
172       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
173                   "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
174                   llvm::Regex::IgnoreCase);
175   static const llvm::Regex NamespaceMacroCommentPattern =
176       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
177                   "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
178                   llvm::Regex::IgnoreCase);
179 
180   SmallVector<StringRef, 8> Groups;
181   if (NamespaceTok->is(TT_NamespaceMacro) &&
182       NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
183     StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
184     // The name of the macro must be used.
185     if (NamespaceTokenText != NamespaceTok->TokenText)
186       return false;
187   } else if (NamespaceTok->isNot(tok::kw_namespace) ||
188              !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
189     // Comment does not match regex.
190     return false;
191   }
192   StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
193   // Anonymous namespace comments must not mention a namespace name.
194   if (NamespaceName.empty() && !NamespaceNameInComment.empty())
195     return false;
196   StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
197   // Named namespace comments must not mention anonymous namespace.
198   if (!NamespaceName.empty() && !AnonymousInComment.empty())
199     return false;
200   if (NamespaceNameInComment == NamespaceName)
201     return true;
202 
203   // Has namespace comment flowed onto the next line.
204   // } // namespace
205   //   // verylongnamespacenamethatdidnotfitonthepreviouscommentline
206   if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
207     return false;
208 
209   static const llvm::Regex CommentPattern = llvm::Regex(
210       "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
211 
212   // Pull out just the comment text.
213   if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
214     return false;
215   NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
216 
217   return NamespaceNameInComment == NamespaceName;
218 }
219 
220 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
221                    const SourceManager &SourceMgr,
222                    tooling::Replacements *Fixes) {
223   auto EndLoc = RBraceTok->Tok.getEndLoc();
224   auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
225   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
226   if (Err) {
227     llvm::errs() << "Error while adding namespace end comment: "
228                  << llvm::toString(std::move(Err)) << "\n";
229   }
230 }
231 
232 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
233                       const SourceManager &SourceMgr,
234                       tooling::Replacements *Fixes) {
235   assert(hasEndComment(RBraceTok));
236   const FormatToken *Comment = RBraceTok->Next;
237   auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
238                                              Comment->Tok.getEndLoc());
239   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
240   if (Err) {
241     llvm::errs() << "Error while updating namespace end comment: "
242                  << llvm::toString(std::move(Err)) << "\n";
243   }
244 }
245 } // namespace
246 
247 const FormatToken *
248 getNamespaceToken(const AnnotatedLine *Line,
249                   const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
250   if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
251     return nullptr;
252   size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
253   if (StartLineIndex == UnwrappedLine::kInvalidIndex)
254     return nullptr;
255   assert(StartLineIndex < AnnotatedLines.size());
256   const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
257   if (NamespaceTok->is(tok::l_brace)) {
258     // "namespace" keyword can be on the line preceding '{', e.g. in styles
259     // where BraceWrapping.AfterNamespace is true.
260     if (StartLineIndex > 0) {
261       NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
262       if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
263         return nullptr;
264     }
265   }
266 
267   return NamespaceTok->getNamespaceToken();
268 }
269 
270 StringRef
271 getNamespaceTokenText(const AnnotatedLine *Line,
272                       const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
273   const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
274   return NamespaceTok ? NamespaceTok->TokenText : StringRef();
275 }
276 
277 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
278                                                      const FormatStyle &Style)
279     : TokenAnalyzer(Env, Style) {}
280 
281 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
282     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
283     FormatTokenLexer &Tokens) {
284   const SourceManager &SourceMgr = Env.getSourceManager();
285   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
286   tooling::Replacements Fixes;
287 
288   // Spin through the lines and ensure we have balanced braces.
289   int Braces = 0;
290   for (AnnotatedLine *Line : AnnotatedLines) {
291     FormatToken *Tok = Line->First;
292     while (Tok) {
293       Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
294       Tok = Tok->Next;
295     }
296   }
297   // Don't attempt to comment unbalanced braces or this can
298   // lead to comments being placed on the closing brace which isn't
299   // the matching brace of the namespace. (occurs during incomplete editing).
300   if (Braces != 0)
301     return {Fixes, 0};
302 
303   std::string AllNamespaceNames;
304   size_t StartLineIndex = SIZE_MAX;
305   StringRef NamespaceTokenText;
306   unsigned int CompactedNamespacesCount = 0;
307   for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
308     const AnnotatedLine *EndLine = AnnotatedLines[I];
309     const FormatToken *NamespaceTok =
310         getNamespaceToken(EndLine, AnnotatedLines);
311     if (!NamespaceTok)
312       continue;
313     FormatToken *RBraceTok = EndLine->First;
314     if (RBraceTok->Finalized)
315       continue;
316     RBraceTok->Finalized = true;
317     const FormatToken *EndCommentPrevTok = RBraceTok;
318     // Namespaces often end with '};'. In that case, attach namespace end
319     // comments to the semicolon tokens.
320     if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
321       EndCommentPrevTok = RBraceTok->Next;
322     if (StartLineIndex == SIZE_MAX)
323       StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
324     std::string NamespaceName = computeName(NamespaceTok);
325     if (Style.CompactNamespaces) {
326       if (CompactedNamespacesCount == 0)
327         NamespaceTokenText = NamespaceTok->TokenText;
328       if ((I + 1 < E) &&
329           NamespaceTokenText ==
330               getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
331           StartLineIndex - CompactedNamespacesCount - 1 ==
332               AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
333           !AnnotatedLines[I + 1]->First->Finalized) {
334         if (hasEndComment(EndCommentPrevTok)) {
335           // remove end comment, it will be merged in next one
336           updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
337         }
338         ++CompactedNamespacesCount;
339         if (!NamespaceName.empty())
340           AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
341         continue;
342       }
343       NamespaceName += AllNamespaceNames;
344       CompactedNamespacesCount = 0;
345       AllNamespaceNames = std::string();
346     }
347     // The next token in the token stream after the place where the end comment
348     // token must be. This is either the next token on the current line or the
349     // first token on the next line.
350     const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
351     if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
352       EndCommentNextTok = EndCommentNextTok->Next;
353     if (!EndCommentNextTok && I + 1 < E)
354       EndCommentNextTok = AnnotatedLines[I + 1]->First;
355     bool AddNewline = EndCommentNextTok &&
356                       EndCommentNextTok->NewlinesBefore == 0 &&
357                       EndCommentNextTok->isNot(tok::eof);
358     const std::string EndCommentText =
359         computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
360                               Style.SpacesInLineCommentPrefix.Minimum);
361     if (!hasEndComment(EndCommentPrevTok)) {
362       bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1;
363       if (!isShort) {
364         addEndComment(EndCommentPrevTok,
365                       std::string(Style.SpacesBeforeTrailingComments, ' ') +
366                           EndCommentText,
367                       SourceMgr, &Fixes);
368       }
369     } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
370                                 NamespaceTok)) {
371       updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
372     }
373     StartLineIndex = SIZE_MAX;
374   }
375   return {Fixes, 0};
376 }
377 
378 } // namespace format
379 } // namespace clang
380