1*13fbcb42Sjoerg //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2*13fbcb42Sjoerg //
3*13fbcb42Sjoerg //                     The LLVM Compiler Infrastructure
4*13fbcb42Sjoerg //
5*13fbcb42Sjoerg // This file is distributed under the University of Illinois Open Source
6*13fbcb42Sjoerg // License. See LICENSE.TXT for details.
7*13fbcb42Sjoerg //
8*13fbcb42Sjoerg //===----------------------------------------------------------------------===//
9*13fbcb42Sjoerg ///
10*13fbcb42Sjoerg /// \file
11*13fbcb42Sjoerg /// This file contains the implementation of MacroExpander, which handles macro
12*13fbcb42Sjoerg /// configuration and expansion while formatting.
13*13fbcb42Sjoerg ///
14*13fbcb42Sjoerg //===----------------------------------------------------------------------===//
15*13fbcb42Sjoerg 
16*13fbcb42Sjoerg #include "Macros.h"
17*13fbcb42Sjoerg 
18*13fbcb42Sjoerg #include "Encoding.h"
19*13fbcb42Sjoerg #include "FormatToken.h"
20*13fbcb42Sjoerg #include "FormatTokenLexer.h"
21*13fbcb42Sjoerg #include "clang/Basic/TokenKinds.h"
22*13fbcb42Sjoerg #include "clang/Format/Format.h"
23*13fbcb42Sjoerg #include "clang/Lex/HeaderSearch.h"
24*13fbcb42Sjoerg #include "clang/Lex/HeaderSearchOptions.h"
25*13fbcb42Sjoerg #include "clang/Lex/Lexer.h"
26*13fbcb42Sjoerg #include "clang/Lex/ModuleLoader.h"
27*13fbcb42Sjoerg #include "clang/Lex/Preprocessor.h"
28*13fbcb42Sjoerg #include "clang/Lex/PreprocessorOptions.h"
29*13fbcb42Sjoerg #include "llvm/ADT/StringSet.h"
30*13fbcb42Sjoerg #include "llvm/Support/ErrorHandling.h"
31*13fbcb42Sjoerg 
32*13fbcb42Sjoerg namespace clang {
33*13fbcb42Sjoerg namespace format {
34*13fbcb42Sjoerg 
35*13fbcb42Sjoerg struct MacroExpander::Definition {
36*13fbcb42Sjoerg   StringRef Name;
37*13fbcb42Sjoerg   SmallVector<FormatToken *, 8> Params;
38*13fbcb42Sjoerg   SmallVector<FormatToken *, 8> Body;
39*13fbcb42Sjoerg 
40*13fbcb42Sjoerg   // Map from each argument's name to its position in the argument list.
41*13fbcb42Sjoerg   // With "M(x, y) x + y":
42*13fbcb42Sjoerg   //   x -> 0
43*13fbcb42Sjoerg   //   y -> 1
44*13fbcb42Sjoerg   llvm::StringMap<size_t> ArgMap;
45*13fbcb42Sjoerg 
46*13fbcb42Sjoerg   bool ObjectLike = true;
47*13fbcb42Sjoerg };
48*13fbcb42Sjoerg 
49*13fbcb42Sjoerg class MacroExpander::DefinitionParser {
50*13fbcb42Sjoerg public:
DefinitionParser(ArrayRef<FormatToken * > Tokens)51*13fbcb42Sjoerg   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
52*13fbcb42Sjoerg     assert(!Tokens.empty());
53*13fbcb42Sjoerg     Current = Tokens[0];
54*13fbcb42Sjoerg   }
55*13fbcb42Sjoerg 
56*13fbcb42Sjoerg   // Parse the token stream and return the corresonding Definition object.
57*13fbcb42Sjoerg   // Returns an empty definition object with a null-Name on error.
parse()58*13fbcb42Sjoerg   MacroExpander::Definition parse() {
59*13fbcb42Sjoerg     if (!Current->is(tok::identifier))
60*13fbcb42Sjoerg       return {};
61*13fbcb42Sjoerg     Def.Name = Current->TokenText;
62*13fbcb42Sjoerg     nextToken();
63*13fbcb42Sjoerg     if (Current->is(tok::l_paren)) {
64*13fbcb42Sjoerg       Def.ObjectLike = false;
65*13fbcb42Sjoerg       if (!parseParams())
66*13fbcb42Sjoerg         return {};
67*13fbcb42Sjoerg     }
68*13fbcb42Sjoerg     if (!parseExpansion())
69*13fbcb42Sjoerg       return {};
70*13fbcb42Sjoerg 
71*13fbcb42Sjoerg     return Def;
72*13fbcb42Sjoerg   }
73*13fbcb42Sjoerg 
74*13fbcb42Sjoerg private:
parseParams()75*13fbcb42Sjoerg   bool parseParams() {
76*13fbcb42Sjoerg     assert(Current->is(tok::l_paren));
77*13fbcb42Sjoerg     nextToken();
78*13fbcb42Sjoerg     while (Current->is(tok::identifier)) {
79*13fbcb42Sjoerg       Def.Params.push_back(Current);
80*13fbcb42Sjoerg       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
81*13fbcb42Sjoerg       nextToken();
82*13fbcb42Sjoerg       if (Current->isNot(tok::comma))
83*13fbcb42Sjoerg         break;
84*13fbcb42Sjoerg       nextToken();
85*13fbcb42Sjoerg     }
86*13fbcb42Sjoerg     if (Current->isNot(tok::r_paren))
87*13fbcb42Sjoerg       return false;
88*13fbcb42Sjoerg     nextToken();
89*13fbcb42Sjoerg     return true;
90*13fbcb42Sjoerg   }
91*13fbcb42Sjoerg 
parseExpansion()92*13fbcb42Sjoerg   bool parseExpansion() {
93*13fbcb42Sjoerg     if (!Current->isOneOf(tok::equal, tok::eof))
94*13fbcb42Sjoerg       return false;
95*13fbcb42Sjoerg     if (Current->is(tok::equal))
96*13fbcb42Sjoerg       nextToken();
97*13fbcb42Sjoerg     parseTail();
98*13fbcb42Sjoerg     return true;
99*13fbcb42Sjoerg   }
100*13fbcb42Sjoerg 
parseTail()101*13fbcb42Sjoerg   void parseTail() {
102*13fbcb42Sjoerg     while (Current->isNot(tok::eof)) {
103*13fbcb42Sjoerg       Def.Body.push_back(Current);
104*13fbcb42Sjoerg       nextToken();
105*13fbcb42Sjoerg     }
106*13fbcb42Sjoerg     Def.Body.push_back(Current);
107*13fbcb42Sjoerg   }
108*13fbcb42Sjoerg 
nextToken()109*13fbcb42Sjoerg   void nextToken() {
110*13fbcb42Sjoerg     if (Pos + 1 < Tokens.size())
111*13fbcb42Sjoerg       ++Pos;
112*13fbcb42Sjoerg     Current = Tokens[Pos];
113*13fbcb42Sjoerg     Current->Finalized = true;
114*13fbcb42Sjoerg   }
115*13fbcb42Sjoerg 
116*13fbcb42Sjoerg   size_t Pos = 0;
117*13fbcb42Sjoerg   FormatToken *Current = nullptr;
118*13fbcb42Sjoerg   Definition Def;
119*13fbcb42Sjoerg   ArrayRef<FormatToken *> Tokens;
120*13fbcb42Sjoerg };
121*13fbcb42Sjoerg 
MacroExpander(const std::vector<std::string> & Macros,clang::SourceManager & SourceMgr,const FormatStyle & Style,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)122*13fbcb42Sjoerg MacroExpander::MacroExpander(
123*13fbcb42Sjoerg     const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
124*13fbcb42Sjoerg     const FormatStyle &Style,
125*13fbcb42Sjoerg     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
126*13fbcb42Sjoerg     IdentifierTable &IdentTable)
127*13fbcb42Sjoerg     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
128*13fbcb42Sjoerg       IdentTable(IdentTable) {
129*13fbcb42Sjoerg   for (const std::string &Macro : Macros) {
130*13fbcb42Sjoerg     parseDefinition(Macro);
131*13fbcb42Sjoerg   }
132*13fbcb42Sjoerg }
133*13fbcb42Sjoerg 
134*13fbcb42Sjoerg MacroExpander::~MacroExpander() = default;
135*13fbcb42Sjoerg 
parseDefinition(const std::string & Macro)136*13fbcb42Sjoerg void MacroExpander::parseDefinition(const std::string &Macro) {
137*13fbcb42Sjoerg   Buffers.push_back(
138*13fbcb42Sjoerg       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
139*13fbcb42Sjoerg   clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
140*13fbcb42Sjoerg   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
141*13fbcb42Sjoerg                        Allocator, IdentTable);
142*13fbcb42Sjoerg   const auto Tokens = Lex.lex();
143*13fbcb42Sjoerg   if (!Tokens.empty()) {
144*13fbcb42Sjoerg     DefinitionParser Parser(Tokens);
145*13fbcb42Sjoerg     auto Definition = Parser.parse();
146*13fbcb42Sjoerg     Definitions[Definition.Name] = std::move(Definition);
147*13fbcb42Sjoerg   }
148*13fbcb42Sjoerg }
149*13fbcb42Sjoerg 
defined(llvm::StringRef Name) const150*13fbcb42Sjoerg bool MacroExpander::defined(llvm::StringRef Name) const {
151*13fbcb42Sjoerg   return Definitions.find(Name) != Definitions.end();
152*13fbcb42Sjoerg }
153*13fbcb42Sjoerg 
objectLike(llvm::StringRef Name) const154*13fbcb42Sjoerg bool MacroExpander::objectLike(llvm::StringRef Name) const {
155*13fbcb42Sjoerg   return Definitions.find(Name)->second.ObjectLike;
156*13fbcb42Sjoerg }
157*13fbcb42Sjoerg 
expand(FormatToken * ID,ArgsList Args) const158*13fbcb42Sjoerg llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
159*13fbcb42Sjoerg                                                           ArgsList Args) const {
160*13fbcb42Sjoerg   assert(defined(ID->TokenText));
161*13fbcb42Sjoerg   SmallVector<FormatToken *, 8> Result;
162*13fbcb42Sjoerg   const Definition &Def = Definitions.find(ID->TokenText)->second;
163*13fbcb42Sjoerg 
164*13fbcb42Sjoerg   // Expand each argument at most once.
165*13fbcb42Sjoerg   llvm::StringSet<> ExpandedArgs;
166*13fbcb42Sjoerg 
167*13fbcb42Sjoerg   // Adds the given token to Result.
168*13fbcb42Sjoerg   auto pushToken = [&](FormatToken *Tok) {
169*13fbcb42Sjoerg     Tok->MacroCtx->ExpandedFrom.push_back(ID);
170*13fbcb42Sjoerg     Result.push_back(Tok);
171*13fbcb42Sjoerg   };
172*13fbcb42Sjoerg 
173*13fbcb42Sjoerg   // If Tok references a parameter, adds the corresponding argument to Result.
174*13fbcb42Sjoerg   // Returns false if Tok does not reference a parameter.
175*13fbcb42Sjoerg   auto expandArgument = [&](FormatToken *Tok) -> bool {
176*13fbcb42Sjoerg     // If the current token references a parameter, expand the corresponding
177*13fbcb42Sjoerg     // argument.
178*13fbcb42Sjoerg     if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
179*13fbcb42Sjoerg       return false;
180*13fbcb42Sjoerg     ExpandedArgs.insert(Tok->TokenText);
181*13fbcb42Sjoerg     auto I = Def.ArgMap.find(Tok->TokenText);
182*13fbcb42Sjoerg     if (I == Def.ArgMap.end())
183*13fbcb42Sjoerg       return false;
184*13fbcb42Sjoerg     // If there are fewer arguments than referenced parameters, treat the
185*13fbcb42Sjoerg     // parameter as empty.
186*13fbcb42Sjoerg     // FIXME: Potentially fully abort the expansion instead.
187*13fbcb42Sjoerg     if (I->getValue() >= Args.size())
188*13fbcb42Sjoerg       return true;
189*13fbcb42Sjoerg     for (FormatToken *Arg : Args[I->getValue()]) {
190*13fbcb42Sjoerg       // A token can be part of a macro argument at multiple levels.
191*13fbcb42Sjoerg       // For example, with "ID(x) x":
192*13fbcb42Sjoerg       // in ID(ID(x)), 'x' is expanded first as argument to the inner
193*13fbcb42Sjoerg       // ID, then again as argument to the outer ID. We keep the macro
194*13fbcb42Sjoerg       // role the token had from the inner expansion.
195*13fbcb42Sjoerg       if (!Arg->MacroCtx)
196*13fbcb42Sjoerg         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
197*13fbcb42Sjoerg       pushToken(Arg);
198*13fbcb42Sjoerg     }
199*13fbcb42Sjoerg     return true;
200*13fbcb42Sjoerg   };
201*13fbcb42Sjoerg 
202*13fbcb42Sjoerg   // Expand the definition into Result.
203*13fbcb42Sjoerg   for (FormatToken *Tok : Def.Body) {
204*13fbcb42Sjoerg     if (expandArgument(Tok))
205*13fbcb42Sjoerg       continue;
206*13fbcb42Sjoerg     // Create a copy of the tokens from the macro body, i.e. were not provided
207*13fbcb42Sjoerg     // by user code.
208*13fbcb42Sjoerg     FormatToken *New = new (Allocator.Allocate()) FormatToken;
209*13fbcb42Sjoerg     New->copyFrom(*Tok);
210*13fbcb42Sjoerg     assert(!New->MacroCtx);
211*13fbcb42Sjoerg     // Tokens that are not part of the user code are not formatted.
212*13fbcb42Sjoerg     New->MacroCtx = MacroExpansion(MR_Hidden);
213*13fbcb42Sjoerg     pushToken(New);
214*13fbcb42Sjoerg   }
215*13fbcb42Sjoerg   assert(Result.size() >= 1 && Result.back()->is(tok::eof));
216*13fbcb42Sjoerg   if (Result.size() > 1) {
217*13fbcb42Sjoerg     ++Result[0]->MacroCtx->StartOfExpansion;
218*13fbcb42Sjoerg     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
219*13fbcb42Sjoerg   }
220*13fbcb42Sjoerg   return Result;
221*13fbcb42Sjoerg }
222*13fbcb42Sjoerg 
223*13fbcb42Sjoerg } // namespace format
224*13fbcb42Sjoerg } // namespace clang
225