1 //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Analysis/MacroExpansionContext.h"
10 #include "llvm/Support/Debug.h"
11
12 #define DEBUG_TYPE "macro-expansion-context"
13
14 static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS,
15 clang::Token Tok);
16
17 namespace clang {
18 namespace detail {
19 class MacroExpansionRangeRecorder : public PPCallbacks {
20 const Preprocessor &PP;
21 SourceManager &SM;
22 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
23
24 public:
MacroExpansionRangeRecorder(const Preprocessor & PP,SourceManager & SM,MacroExpansionContext::ExpansionRangeMap & ExpansionRanges)25 explicit MacroExpansionRangeRecorder(
26 const Preprocessor &PP, SourceManager &SM,
27 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
28 : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
29
MacroExpands(const Token & MacroName,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)30 void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
31 SourceRange Range, const MacroArgs *Args) override {
32 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
33 if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
34 return;
35
36 SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
37 assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
38
39 const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
40 // If the range is empty, use the length of the macro.
41 if (Range.getBegin() == Range.getEnd())
42 return SM.getExpansionLoc(
43 MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
44
45 // Include the last character.
46 return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
47 }();
48
49 (void)PP;
50 LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
51 dumpTokenInto(PP, llvm::dbgs(), MacroName);
52 llvm::dbgs()
53 << "' with length " << MacroName.getLength() << " at ";
54 MacroNameBegin.print(llvm::dbgs(), SM);
55 llvm::dbgs() << ", expansion end at ";
56 ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
57
58 // If the expansion range is empty, use the identifier of the macro as a
59 // range.
60 MacroExpansionContext::ExpansionRangeMap::iterator It;
61 bool Inserted;
62 std::tie(It, Inserted) =
63 ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
64 if (Inserted) {
65 LLVM_DEBUG(llvm::dbgs() << "maps ";
66 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
67 It->getSecond().print(llvm::dbgs(), SM);
68 llvm::dbgs() << '\n';);
69 } else {
70 if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
71 It->getSecond() = ExpansionEnd;
72 LLVM_DEBUG(
73 llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
74 llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
75 llvm::dbgs() << '\n';);
76 }
77 }
78 }
79 };
80 } // namespace detail
81 } // namespace clang
82
83 using namespace clang;
84
MacroExpansionContext(const LangOptions & LangOpts)85 MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
86 : LangOpts(LangOpts) {}
87
registerForPreprocessor(Preprocessor & NewPP)88 void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
89 PP = &NewPP;
90 SM = &NewPP.getSourceManager();
91
92 // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
93 PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
94 *PP, *SM, ExpansionRanges));
95 // Same applies here.
96 PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
97 }
98
99 Optional<StringRef>
getExpandedText(SourceLocation MacroExpansionLoc) const100 MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
101 if (MacroExpansionLoc.isMacroID())
102 return llvm::None;
103
104 // If there was no macro expansion at that location, return None.
105 if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
106 return llvm::None;
107
108 // There was macro expansion, but resulted in no tokens, return empty string.
109 const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
110 if (It == ExpandedTokens.end())
111 return StringRef{""};
112
113 // Otherwise we have the actual token sequence as string.
114 return It->getSecond().str();
115 }
116
117 Optional<StringRef>
getOriginalText(SourceLocation MacroExpansionLoc) const118 MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
119 if (MacroExpansionLoc.isMacroID())
120 return llvm::None;
121
122 const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
123 if (It == ExpansionRanges.end())
124 return llvm::None;
125
126 assert(It->getFirst() != It->getSecond() &&
127 "Every macro expansion must cover a non-empty range.");
128
129 return Lexer::getSourceText(
130 CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
131 LangOpts);
132 }
133
dumpExpansionRanges() const134 void MacroExpansionContext::dumpExpansionRanges() const {
135 dumpExpansionRangesToStream(llvm::dbgs());
136 }
dumpExpandedTexts() const137 void MacroExpansionContext::dumpExpandedTexts() const {
138 dumpExpandedTextsToStream(llvm::dbgs());
139 }
140
dumpExpansionRangesToStream(raw_ostream & OS) const141 void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
142 std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
143 LocalExpansionRanges.reserve(ExpansionRanges.size());
144 for (const auto &Record : ExpansionRanges)
145 LocalExpansionRanges.emplace_back(
146 std::make_pair(Record.getFirst(), Record.getSecond()));
147 llvm::sort(LocalExpansionRanges);
148
149 OS << "\n=============== ExpansionRanges ===============\n";
150 for (const auto &Record : LocalExpansionRanges) {
151 OS << "> ";
152 Record.first.print(OS, *SM);
153 OS << ", ";
154 Record.second.print(OS, *SM);
155 OS << '\n';
156 }
157 }
158
dumpExpandedTextsToStream(raw_ostream & OS) const159 void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
160 std::vector<std::pair<SourceLocation, MacroExpansionText>>
161 LocalExpandedTokens;
162 LocalExpandedTokens.reserve(ExpandedTokens.size());
163 for (const auto &Record : ExpandedTokens)
164 LocalExpandedTokens.emplace_back(
165 std::make_pair(Record.getFirst(), Record.getSecond()));
166 llvm::sort(LocalExpandedTokens);
167
168 OS << "\n=============== ExpandedTokens ===============\n";
169 for (const auto &Record : LocalExpandedTokens) {
170 OS << "> ";
171 Record.first.print(OS, *SM);
172 OS << " -> '" << Record.second << "'\n";
173 }
174 }
175
dumpTokenInto(const Preprocessor & PP,raw_ostream & OS,Token Tok)176 static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
177 assert(Tok.isNot(tok::raw_identifier));
178
179 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
180 if (Tok.isAnnotation())
181 return;
182
183 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
184 // FIXME: For now, we don't respect whitespaces between macro expanded
185 // tokens. We just emit a space after every identifier to produce a valid
186 // code for `int a ;` like expansions.
187 // ^-^-- Space after the 'int' and 'a' identifiers.
188 OS << II->getName() << ' ';
189 } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
190 OS << StringRef(Tok.getLiteralData(), Tok.getLength());
191 } else {
192 char Tmp[256];
193 if (Tok.getLength() < sizeof(Tmp)) {
194 const char *TokPtr = Tmp;
195 // FIXME: Might use a different overload for cleaner callsite.
196 unsigned Len = PP.getSpelling(Tok, TokPtr);
197 OS.write(TokPtr, Len);
198 } else {
199 OS << "<too long token>";
200 }
201 }
202 }
203
onTokenLexed(const Token & Tok)204 void MacroExpansionContext::onTokenLexed(const Token &Tok) {
205 SourceLocation SLoc = Tok.getLocation();
206 if (SLoc.isFileID())
207 return;
208
209 LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
210 dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
211 SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
212
213 // Remove spelling location.
214 SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
215
216 MacroExpansionText TokenAsString;
217 llvm::raw_svector_ostream OS(TokenAsString);
218
219 // FIXME: Prepend newlines and space to produce the exact same output as the
220 // preprocessor would for this token.
221
222 dumpTokenInto(*PP, OS, Tok);
223
224 ExpansionMap::iterator It;
225 bool Inserted;
226 std::tie(It, Inserted) =
227 ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
228 if (!Inserted)
229 It->getSecond().append(TokenAsString);
230 }
231
232