1 //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Analysis/MacroExpansionContext.h"
10 #include "llvm/Support/Debug.h"
11 #include <optional>
12 
13 #define DEBUG_TYPE "macro-expansion-context"
14 
15 static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS,
16                           clang::Token Tok);
17 
18 namespace clang {
19 namespace detail {
20 class MacroExpansionRangeRecorder : public PPCallbacks {
21   const Preprocessor &PP;
22   SourceManager &SM;
23   MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
24 
25 public:
26   explicit MacroExpansionRangeRecorder(
27       const Preprocessor &PP, SourceManager &SM,
28       MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
29       : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
30 
31   void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
32                     SourceRange Range, const MacroArgs *Args) override {
33     // Ignore annotation tokens like: _Pragma("pack(push, 1)")
34     if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
35       return;
36 
37     SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
38     assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
39 
40     const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
41       // If the range is empty, use the length of the macro.
42       if (Range.getBegin() == Range.getEnd())
43         return SM.getExpansionLoc(
44             MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
45 
46       // Include the last character.
47       return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
48     }();
49 
50     (void)PP;
51     LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
52                dumpTokenInto(PP, llvm::dbgs(), MacroName);
53                llvm::dbgs()
54                << "' with length " << MacroName.getLength() << " at ";
55                MacroNameBegin.print(llvm::dbgs(), SM);
56                llvm::dbgs() << ", expansion end at ";
57                ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
58 
59     // If the expansion range is empty, use the identifier of the macro as a
60     // range.
61     MacroExpansionContext::ExpansionRangeMap::iterator It;
62     bool Inserted;
63     std::tie(It, Inserted) =
64         ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
65     if (Inserted) {
66       LLVM_DEBUG(llvm::dbgs() << "maps ";
67                  It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
68                  It->getSecond().print(llvm::dbgs(), SM);
69                  llvm::dbgs() << '\n';);
70     } else {
71       if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
72         It->getSecond() = ExpansionEnd;
73         LLVM_DEBUG(
74             llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
75             llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
76             llvm::dbgs() << '\n';);
77       }
78     }
79   }
80 };
81 } // namespace detail
82 } // namespace clang
83 
84 using namespace clang;
85 
86 MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
87     : LangOpts(LangOpts) {}
88 
89 void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
90   PP = &NewPP;
91   SM = &NewPP.getSourceManager();
92 
93   // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
94   PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
95       *PP, *SM, ExpansionRanges));
96   // Same applies here.
97   PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
98 }
99 
100 std::optional<StringRef>
101 MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
102   if (MacroExpansionLoc.isMacroID())
103     return std::nullopt;
104 
105   // If there was no macro expansion at that location, return std::nullopt.
106   if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
107     return std::nullopt;
108 
109   // There was macro expansion, but resulted in no tokens, return empty string.
110   const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
111   if (It == ExpandedTokens.end())
112     return StringRef{""};
113 
114   // Otherwise we have the actual token sequence as string.
115   return It->getSecond().str();
116 }
117 
118 std::optional<StringRef>
119 MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
120   if (MacroExpansionLoc.isMacroID())
121     return std::nullopt;
122 
123   const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
124   if (It == ExpansionRanges.end())
125     return std::nullopt;
126 
127   assert(It->getFirst() != It->getSecond() &&
128          "Every macro expansion must cover a non-empty range.");
129 
130   return Lexer::getSourceText(
131       CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
132       LangOpts);
133 }
134 
135 void MacroExpansionContext::dumpExpansionRanges() const {
136   dumpExpansionRangesToStream(llvm::dbgs());
137 }
138 void MacroExpansionContext::dumpExpandedTexts() const {
139   dumpExpandedTextsToStream(llvm::dbgs());
140 }
141 
142 void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
143   std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
144   LocalExpansionRanges.reserve(ExpansionRanges.size());
145   for (const auto &Record : ExpansionRanges)
146     LocalExpansionRanges.emplace_back(
147         std::make_pair(Record.getFirst(), Record.getSecond()));
148   llvm::sort(LocalExpansionRanges);
149 
150   OS << "\n=============== ExpansionRanges ===============\n";
151   for (const auto &Record : LocalExpansionRanges) {
152     OS << "> ";
153     Record.first.print(OS, *SM);
154     OS << ", ";
155     Record.second.print(OS, *SM);
156     OS << '\n';
157   }
158 }
159 
160 void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
161   std::vector<std::pair<SourceLocation, MacroExpansionText>>
162       LocalExpandedTokens;
163   LocalExpandedTokens.reserve(ExpandedTokens.size());
164   for (const auto &Record : ExpandedTokens)
165     LocalExpandedTokens.emplace_back(
166         std::make_pair(Record.getFirst(), Record.getSecond()));
167   llvm::sort(LocalExpandedTokens);
168 
169   OS << "\n=============== ExpandedTokens ===============\n";
170   for (const auto &Record : LocalExpandedTokens) {
171     OS << "> ";
172     Record.first.print(OS, *SM);
173     OS << " -> '" << Record.second << "'\n";
174   }
175 }
176 
177 static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
178   assert(Tok.isNot(tok::raw_identifier));
179 
180   // Ignore annotation tokens like: _Pragma("pack(push, 1)")
181   if (Tok.isAnnotation())
182     return;
183 
184   if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
185     // FIXME: For now, we don't respect whitespaces between macro expanded
186     // tokens. We just emit a space after every identifier to produce a valid
187     // code for `int a ;` like expansions.
188     //              ^-^-- Space after the 'int' and 'a' identifiers.
189     OS << II->getName() << ' ';
190   } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
191     OS << StringRef(Tok.getLiteralData(), Tok.getLength());
192   } else {
193     char Tmp[256];
194     if (Tok.getLength() < sizeof(Tmp)) {
195       const char *TokPtr = Tmp;
196       // FIXME: Might use a different overload for cleaner callsite.
197       unsigned Len = PP.getSpelling(Tok, TokPtr);
198       OS.write(TokPtr, Len);
199     } else {
200       OS << "<too long token>";
201     }
202   }
203 }
204 
205 void MacroExpansionContext::onTokenLexed(const Token &Tok) {
206   SourceLocation SLoc = Tok.getLocation();
207   if (SLoc.isFileID())
208     return;
209 
210   LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
211              dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
212              SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
213 
214   // Remove spelling location.
215   SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
216 
217   MacroExpansionText TokenAsString;
218   llvm::raw_svector_ostream OS(TokenAsString);
219 
220   // FIXME: Prepend newlines and space to produce the exact same output as the
221   // preprocessor would for this token.
222 
223   dumpTokenInto(*PP, OS, Tok);
224 
225   ExpansionMap::iterator It;
226   bool Inserted;
227   std::tie(It, Inserted) =
228       ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
229   if (!Inserted)
230     It->getSecond().append(TokenAsString);
231 }
232 
233