1 //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Analysis/MacroExpansionContext.h"
10 #include "llvm/Support/Debug.h"
11 
12 #define DEBUG_TYPE "macro-expansion-context"
13 
14 static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS,
15                           clang::Token Tok);
16 
17 namespace clang {
18 namespace detail {
19 class MacroExpansionRangeRecorder : public PPCallbacks {
20   const Preprocessor &PP;
21   SourceManager &SM;
22   MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
23 
24 public:
25   explicit MacroExpansionRangeRecorder(
26       const Preprocessor &PP, SourceManager &SM,
27       MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
28       : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
29 
30   void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
31                     SourceRange Range, const MacroArgs *Args) override {
32     // Ignore annotation tokens like: _Pragma("pack(push, 1)")
33     if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
34       return;
35 
36     SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
37     assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
38 
39     const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
40       // If the range is empty, use the length of the macro.
41       if (Range.getBegin() == Range.getEnd())
42         return SM.getExpansionLoc(
43             MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
44 
45       // Include the last character.
46       return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
47     }();
48 
49     (void)PP;
50     LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
51                dumpTokenInto(PP, llvm::dbgs(), MacroName);
52                llvm::dbgs()
53                << "' with length " << MacroName.getLength() << " at ";
54                MacroNameBegin.print(llvm::dbgs(), SM);
55                llvm::dbgs() << ", expansion end at ";
56                ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
57 
58     // If the expansion range is empty, use the identifier of the macro as a
59     // range.
60     MacroExpansionContext::ExpansionRangeMap::iterator It;
61     bool Inserted;
62     std::tie(It, Inserted) =
63         ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
64     if (Inserted) {
65       LLVM_DEBUG(llvm::dbgs() << "maps ";
66                  It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
67                  It->getSecond().print(llvm::dbgs(), SM);
68                  llvm::dbgs() << '\n';);
69     } else {
70       if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
71         It->getSecond() = ExpansionEnd;
72         LLVM_DEBUG(
73             llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
74             llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
75             llvm::dbgs() << '\n';);
76       }
77     }
78   }
79 };
80 } // namespace detail
81 } // namespace clang
82 
83 using namespace clang;
84 
85 MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
86     : LangOpts(LangOpts) {}
87 
88 void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
89   PP = &NewPP;
90   SM = &NewPP.getSourceManager();
91 
92   // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
93   PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
94       *PP, *SM, ExpansionRanges));
95   // Same applies here.
96   PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
97 }
98 
99 Optional<StringRef>
100 MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
101   if (MacroExpansionLoc.isMacroID())
102     return llvm::None;
103 
104   // If there was no macro expansion at that location, return None.
105   if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
106     return llvm::None;
107 
108   // There was macro expansion, but resulted in no tokens, return empty string.
109   const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
110   if (It == ExpandedTokens.end())
111     return StringRef{""};
112 
113   // Otherwise we have the actual token sequence as string.
114   return It->getSecond().str();
115 }
116 
117 Optional<StringRef>
118 MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
119   if (MacroExpansionLoc.isMacroID())
120     return llvm::None;
121 
122   const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
123   if (It == ExpansionRanges.end())
124     return llvm::None;
125 
126   assert(It->getFirst() != It->getSecond() &&
127          "Every macro expansion must cover a non-empty range.");
128 
129   return Lexer::getSourceText(
130       CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
131       LangOpts);
132 }
133 
134 void MacroExpansionContext::dumpExpansionRanges() const {
135   dumpExpansionRangesToStream(llvm::dbgs());
136 }
137 void MacroExpansionContext::dumpExpandedTexts() const {
138   dumpExpandedTextsToStream(llvm::dbgs());
139 }
140 
141 void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
142   std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
143   LocalExpansionRanges.reserve(ExpansionRanges.size());
144   for (const auto &Record : ExpansionRanges)
145     LocalExpansionRanges.emplace_back(
146         std::make_pair(Record.getFirst(), Record.getSecond()));
147   llvm::sort(LocalExpansionRanges);
148 
149   OS << "\n=============== ExpansionRanges ===============\n";
150   for (const auto &Record : LocalExpansionRanges) {
151     OS << "> ";
152     Record.first.print(OS, *SM);
153     OS << ", ";
154     Record.second.print(OS, *SM);
155     OS << '\n';
156   }
157 }
158 
159 void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
160   std::vector<std::pair<SourceLocation, MacroExpansionText>>
161       LocalExpandedTokens;
162   LocalExpandedTokens.reserve(ExpandedTokens.size());
163   for (const auto &Record : ExpandedTokens)
164     LocalExpandedTokens.emplace_back(
165         std::make_pair(Record.getFirst(), Record.getSecond()));
166   llvm::sort(LocalExpandedTokens);
167 
168   OS << "\n=============== ExpandedTokens ===============\n";
169   for (const auto &Record : LocalExpandedTokens) {
170     OS << "> ";
171     Record.first.print(OS, *SM);
172     OS << " -> '" << Record.second << "'\n";
173   }
174 }
175 
176 static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
177   assert(Tok.isNot(tok::raw_identifier));
178 
179   // Ignore annotation tokens like: _Pragma("pack(push, 1)")
180   if (Tok.isAnnotation())
181     return;
182 
183   if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
184     // FIXME: For now, we don't respect whitespaces between macro expanded
185     // tokens. We just emit a space after every identifier to produce a valid
186     // code for `int a ;` like expansions.
187     //              ^-^-- Space after the 'int' and 'a' identifiers.
188     OS << II->getName() << ' ';
189   } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
190     OS << StringRef(Tok.getLiteralData(), Tok.getLength());
191   } else {
192     char Tmp[256];
193     if (Tok.getLength() < sizeof(Tmp)) {
194       const char *TokPtr = Tmp;
195       // FIXME: Might use a different overload for cleaner callsite.
196       unsigned Len = PP.getSpelling(Tok, TokPtr);
197       OS.write(TokPtr, Len);
198     } else {
199       OS << "<too long token>";
200     }
201   }
202 }
203 
204 void MacroExpansionContext::onTokenLexed(const Token &Tok) {
205   SourceLocation SLoc = Tok.getLocation();
206   if (SLoc.isFileID())
207     return;
208 
209   LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
210              dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
211              SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
212 
213   // Remove spelling location.
214   SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
215 
216   MacroExpansionText TokenAsString;
217   llvm::raw_svector_ostream OS(TokenAsString);
218 
219   // FIXME: Prepend newlines and space to produce the exact same output as the
220   // preprocessor would for this token.
221 
222   dumpTokenInto(*PP, OS, Tok);
223 
224   ExpansionMap::iterator It;
225   bool Inserted;
226   std::tie(It, Inserted) =
227       ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
228   if (!Inserted)
229     It->getSecond().append(TokenAsString);
230 }
231 
232