1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of MacroExpander, which handles macro
11 /// configuration and expansion while formatting.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "Macros.h"
16 
17 #include "Encoding.h"
18 #include "FormatToken.h"
19 #include "FormatTokenLexer.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "clang/Format/Format.h"
22 #include "clang/Lex/HeaderSearch.h"
23 #include "clang/Lex/HeaderSearchOptions.h"
24 #include "clang/Lex/Lexer.h"
25 #include "clang/Lex/ModuleLoader.h"
26 #include "clang/Lex/Preprocessor.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "llvm/ADT/StringSet.h"
29 #include "llvm/Support/ErrorHandling.h"
30 
31 namespace clang {
32 namespace format {
33 
34 struct MacroExpander::Definition {
35   StringRef Name;
36   SmallVector<FormatToken *, 8> Params;
37   SmallVector<FormatToken *, 8> Body;
38 
39   // Map from each argument's name to its position in the argument list.
40   // With "M(x, y) x + y":
41   //   x -> 0
42   //   y -> 1
43   llvm::StringMap<size_t> ArgMap;
44 
45   bool ObjectLike = true;
46 };
47 
48 class MacroExpander::DefinitionParser {
49 public:
50   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51     assert(!Tokens.empty());
52     Current = Tokens[0];
53   }
54 
55   // Parse the token stream and return the corresponding Definition object.
56   // Returns an empty definition object with a null-Name on error.
57   MacroExpander::Definition parse() {
58     if (!Current->is(tok::identifier))
59       return {};
60     Def.Name = Current->TokenText;
61     nextToken();
62     if (Current->is(tok::l_paren)) {
63       Def.ObjectLike = false;
64       if (!parseParams())
65         return {};
66     }
67     if (!parseExpansion())
68       return {};
69 
70     return Def;
71   }
72 
73 private:
74   bool parseParams() {
75     assert(Current->is(tok::l_paren));
76     nextToken();
77     while (Current->is(tok::identifier)) {
78       Def.Params.push_back(Current);
79       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80       nextToken();
81       if (Current->isNot(tok::comma))
82         break;
83       nextToken();
84     }
85     if (Current->isNot(tok::r_paren))
86       return false;
87     nextToken();
88     return true;
89   }
90 
91   bool parseExpansion() {
92     if (!Current->isOneOf(tok::equal, tok::eof))
93       return false;
94     if (Current->is(tok::equal))
95       nextToken();
96     parseTail();
97     return true;
98   }
99 
100   void parseTail() {
101     while (Current->isNot(tok::eof)) {
102       Def.Body.push_back(Current);
103       nextToken();
104     }
105     Def.Body.push_back(Current);
106   }
107 
108   void nextToken() {
109     if (Pos + 1 < Tokens.size())
110       ++Pos;
111     Current = Tokens[Pos];
112     Current->Finalized = true;
113   }
114 
115   size_t Pos = 0;
116   FormatToken *Current = nullptr;
117   Definition Def;
118   ArrayRef<FormatToken *> Tokens;
119 };
120 
121 MacroExpander::MacroExpander(
122     const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
123     const FormatStyle &Style,
124     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125     IdentifierTable &IdentTable)
126     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127       IdentTable(IdentTable) {
128   for (const std::string &Macro : Macros) {
129     parseDefinition(Macro);
130   }
131 }
132 
133 MacroExpander::~MacroExpander() = default;
134 
135 void MacroExpander::parseDefinition(const std::string &Macro) {
136   Buffers.push_back(
137       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
138   clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
139   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
140                        Allocator, IdentTable);
141   const auto Tokens = Lex.lex();
142   if (!Tokens.empty()) {
143     DefinitionParser Parser(Tokens);
144     auto Definition = Parser.parse();
145     Definitions[Definition.Name] = std::move(Definition);
146   }
147 }
148 
149 bool MacroExpander::defined(llvm::StringRef Name) const {
150   return Definitions.find(Name) != Definitions.end();
151 }
152 
153 bool MacroExpander::objectLike(llvm::StringRef Name) const {
154   return Definitions.find(Name)->second.ObjectLike;
155 }
156 
157 llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
158                                                           ArgsList Args) const {
159   assert(defined(ID->TokenText));
160   SmallVector<FormatToken *, 8> Result;
161   const Definition &Def = Definitions.find(ID->TokenText)->second;
162 
163   // Expand each argument at most once.
164   llvm::StringSet<> ExpandedArgs;
165 
166   // Adds the given token to Result.
167   auto pushToken = [&](FormatToken *Tok) {
168     Tok->MacroCtx->ExpandedFrom.push_back(ID);
169     Result.push_back(Tok);
170   };
171 
172   // If Tok references a parameter, adds the corresponding argument to Result.
173   // Returns false if Tok does not reference a parameter.
174   auto expandArgument = [&](FormatToken *Tok) -> bool {
175     // If the current token references a parameter, expand the corresponding
176     // argument.
177     if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
178       return false;
179     ExpandedArgs.insert(Tok->TokenText);
180     auto I = Def.ArgMap.find(Tok->TokenText);
181     if (I == Def.ArgMap.end())
182       return false;
183     // If there are fewer arguments than referenced parameters, treat the
184     // parameter as empty.
185     // FIXME: Potentially fully abort the expansion instead.
186     if (I->getValue() >= Args.size())
187       return true;
188     for (FormatToken *Arg : Args[I->getValue()]) {
189       // A token can be part of a macro argument at multiple levels.
190       // For example, with "ID(x) x":
191       // in ID(ID(x)), 'x' is expanded first as argument to the inner
192       // ID, then again as argument to the outer ID. We keep the macro
193       // role the token had from the inner expansion.
194       if (!Arg->MacroCtx)
195         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
196       pushToken(Arg);
197     }
198     return true;
199   };
200 
201   // Expand the definition into Result.
202   for (FormatToken *Tok : Def.Body) {
203     if (expandArgument(Tok))
204       continue;
205     // Create a copy of the tokens from the macro body, i.e. were not provided
206     // by user code.
207     FormatToken *New = new (Allocator.Allocate()) FormatToken;
208     New->copyFrom(*Tok);
209     assert(!New->MacroCtx);
210     // Tokens that are not part of the user code are not formatted.
211     New->MacroCtx = MacroExpansion(MR_Hidden);
212     pushToken(New);
213   }
214   assert(Result.size() >= 1 && Result.back()->is(tok::eof));
215   if (Result.size() > 1) {
216     ++Result[0]->MacroCtx->StartOfExpansion;
217     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
218   }
219   return Result;
220 }
221 
222 } // namespace format
223 } // namespace clang
224