1 //===--- RawCommentList.h - Classes for processing raw comments -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_AST_RAWCOMMENTLIST_H
10 #define LLVM_CLANG_AST_RAWCOMMENTLIST_H
11 
12 #include "clang/Basic/CommentOptions.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/Support/Allocator.h"
17 #include <map>
18 
19 namespace clang {
20 
21 class ASTContext;
22 class ASTReader;
23 class Decl;
24 class DiagnosticsEngine;
25 class Preprocessor;
26 class SourceManager;
27 
28 namespace comments {
29   class FullComment;
30 } // end namespace comments
31 
32 class RawComment {
33 public:
34   enum CommentKind {
35     RCK_Invalid,      ///< Invalid comment
36     RCK_OrdinaryBCPL, ///< Any normal BCPL comments
37     RCK_OrdinaryC,    ///< Any normal C comment
38     RCK_BCPLSlash,    ///< \code /// stuff \endcode
39     RCK_BCPLExcl,     ///< \code //! stuff \endcode
40     RCK_JavaDoc,      ///< \code /** stuff */ \endcode
41     RCK_Qt,           ///< \code /*! stuff */ \endcode, also used by HeaderDoc
42     RCK_Merged        ///< Two or more documentation comments merged together
43   };
44 
45   RawComment() : Kind(RCK_Invalid), IsAlmostTrailingComment(false) { }
46 
47   RawComment(const SourceManager &SourceMgr, SourceRange SR,
48              const CommentOptions &CommentOpts, bool Merged);
49 
50   CommentKind getKind() const LLVM_READONLY {
51     return (CommentKind) Kind;
52   }
53 
54   bool isInvalid() const LLVM_READONLY {
55     return Kind == RCK_Invalid;
56   }
57 
58   bool isMerged() const LLVM_READONLY {
59     return Kind == RCK_Merged;
60   }
61 
62   /// Is this comment attached to any declaration?
63   bool isAttached() const LLVM_READONLY {
64     return IsAttached;
65   }
66 
67   void setAttached() {
68     IsAttached = true;
69   }
70 
71   /// Returns true if it is a comment that should be put after a member:
72   /// \code ///< stuff \endcode
73   /// \code //!< stuff \endcode
74   /// \code /**< stuff */ \endcode
75   /// \code /*!< stuff */ \endcode
76   bool isTrailingComment() const LLVM_READONLY {
77     return IsTrailingComment;
78   }
79 
80   /// Returns true if it is a probable typo:
81   /// \code //< stuff \endcode
82   /// \code /*< stuff */ \endcode
83   bool isAlmostTrailingComment() const LLVM_READONLY {
84     return IsAlmostTrailingComment;
85   }
86 
87   /// Returns true if this comment is not a documentation comment.
88   bool isOrdinary() const LLVM_READONLY {
89     return ((Kind == RCK_OrdinaryBCPL) || (Kind == RCK_OrdinaryC));
90   }
91 
92   /// Returns true if this comment any kind of a documentation comment.
93   bool isDocumentation() const LLVM_READONLY {
94     return !isInvalid() && !isOrdinary();
95   }
96 
97   /// Returns raw comment text with comment markers.
98   StringRef getRawText(const SourceManager &SourceMgr) const {
99     if (RawTextValid)
100       return RawText;
101 
102     RawText = getRawTextSlow(SourceMgr);
103     RawTextValid = true;
104     return RawText;
105   }
106 
107   SourceRange getSourceRange() const LLVM_READONLY { return Range; }
108   SourceLocation getBeginLoc() const LLVM_READONLY { return Range.getBegin(); }
109   SourceLocation getEndLoc() const LLVM_READONLY { return Range.getEnd(); }
110 
111   const char *getBriefText(const ASTContext &Context) const {
112     if (BriefTextValid)
113       return BriefText;
114 
115     return extractBriefText(Context);
116   }
117 
118   bool hasUnsupportedSplice(const SourceManager &SourceMgr) const {
119     if (!isInvalid())
120       return false;
121     StringRef Text = getRawText(SourceMgr);
122     if (Text.size() < 6 || Text[0] != '/')
123       return false;
124     if (Text[1] == '*')
125       return Text[Text.size() - 1] != '/' || Text[Text.size() - 2] != '*';
126     return Text[1] != '/';
127   }
128 
129   /// Returns sanitized comment text, suitable for presentation in editor UIs.
130   /// E.g. will transform:
131   ///     // This is a long multiline comment.
132   ///     //   Parts of it  might be indented.
133   ///     /* The comments styles might be mixed. */
134   ///  into
135   ///     "This is a long multiline comment.\n"
136   ///     "  Parts of it  might be indented.\n"
137   ///     "The comments styles might be mixed."
138   /// Also removes leading indentation and sanitizes some common cases:
139   ///     /* This is a first line.
140   ///      *   This is a second line. It is indented.
141   ///      * This is a third line. */
142   /// and
143   ///     /* This is a first line.
144   ///          This is a second line. It is indented.
145   ///     This is a third line. */
146   /// will both turn into:
147   ///     "This is a first line.\n"
148   ///     "  This is a second line. It is indented.\n"
149   ///     "This is a third line."
150   std::string getFormattedText(const SourceManager &SourceMgr,
151                                DiagnosticsEngine &Diags) const;
152 
153   struct CommentLine {
154     std::string Text;
155     PresumedLoc Begin;
156     PresumedLoc End;
157 
158     CommentLine(StringRef Text, PresumedLoc Begin, PresumedLoc End)
159         : Text(Text), Begin(Begin), End(End) {}
160   };
161 
162   /// Returns sanitized comment text as separated lines with locations in
163   /// source, suitable for further processing and rendering requiring source
164   /// locations.
165   std::vector<CommentLine> getFormattedLines(const SourceManager &SourceMgr,
166                                              DiagnosticsEngine &Diags) const;
167 
168   /// Parse the comment, assuming it is attached to decl \c D.
169   comments::FullComment *parse(const ASTContext &Context,
170                                const Preprocessor *PP, const Decl *D) const;
171 
172 private:
173   SourceRange Range;
174 
175   mutable StringRef RawText;
176   mutable const char *BriefText = nullptr;
177 
178   mutable bool RawTextValid : 1;   ///< True if RawText is valid
179   mutable bool BriefTextValid : 1; ///< True if BriefText is valid
180 
181   unsigned Kind : 3;
182 
183   /// True if comment is attached to a declaration in ASTContext.
184   bool IsAttached : 1;
185 
186   bool IsTrailingComment : 1;
187   bool IsAlmostTrailingComment : 1;
188 
189   /// Constructor for AST deserialization.
190   RawComment(SourceRange SR, CommentKind K, bool IsTrailingComment,
191              bool IsAlmostTrailingComment) :
192     Range(SR), RawTextValid(false), BriefTextValid(false), Kind(K),
193     IsAttached(false), IsTrailingComment(IsTrailingComment),
194     IsAlmostTrailingComment(IsAlmostTrailingComment)
195   { }
196 
197   StringRef getRawTextSlow(const SourceManager &SourceMgr) const;
198 
199   const char *extractBriefText(const ASTContext &Context) const;
200 
201   friend class ASTReader;
202 };
203 
204 /// This class represents all comments included in the translation unit,
205 /// sorted in order of appearance in the translation unit.
206 class RawCommentList {
207 public:
208   RawCommentList(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
209 
210   void addComment(const RawComment &RC, const CommentOptions &CommentOpts,
211                   llvm::BumpPtrAllocator &Allocator);
212 
213   /// \returns A mapping from an offset of the start of the comment to the
214   /// comment itself, or nullptr in case there are no comments in \p File.
215   const std::map<unsigned, RawComment *> *getCommentsInFile(FileID File) const;
216 
217   bool empty() const;
218 
219   unsigned getCommentBeginLine(RawComment *C, FileID File,
220                                unsigned Offset) const;
221   unsigned getCommentEndOffset(RawComment *C) const;
222 
223 private:
224   SourceManager &SourceMgr;
225   // mapping: FileId -> comment begin offset -> comment
226   llvm::DenseMap<FileID, std::map<unsigned, RawComment *>> OrderedComments;
227   mutable llvm::DenseMap<RawComment *, unsigned> CommentBeginLine;
228   mutable llvm::DenseMap<RawComment *, unsigned> CommentEndOffset;
229 
230   friend class ASTReader;
231   friend class ASTWriter;
232 };
233 
234 } // end namespace clang
235 
236 #endif
237