1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of MacroExpander, which handles macro
11 /// configuration and expansion while formatting.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "Macros.h"
16
17 #include "Encoding.h"
18 #include "FormatToken.h"
19 #include "FormatTokenLexer.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "clang/Format/Format.h"
22 #include "clang/Lex/HeaderSearch.h"
23 #include "clang/Lex/HeaderSearchOptions.h"
24 #include "clang/Lex/Lexer.h"
25 #include "clang/Lex/ModuleLoader.h"
26 #include "clang/Lex/Preprocessor.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "llvm/ADT/StringSet.h"
29 #include "llvm/Support/ErrorHandling.h"
30
31 namespace clang {
32 namespace format {
33
34 struct MacroExpander::Definition {
35 StringRef Name;
36 SmallVector<FormatToken *, 8> Params;
37 SmallVector<FormatToken *, 8> Body;
38
39 // Map from each argument's name to its position in the argument list.
40 // With "M(x, y) x + y":
41 // x -> 0
42 // y -> 1
43 llvm::StringMap<size_t> ArgMap;
44
45 bool ObjectLike = true;
46 };
47
48 class MacroExpander::DefinitionParser {
49 public:
DefinitionParser(ArrayRef<FormatToken * > Tokens)50 DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51 assert(!Tokens.empty());
52 Current = Tokens[0];
53 }
54
55 // Parse the token stream and return the corresponding Definition object.
56 // Returns an empty definition object with a null-Name on error.
parse()57 MacroExpander::Definition parse() {
58 if (!Current->is(tok::identifier))
59 return {};
60 Def.Name = Current->TokenText;
61 nextToken();
62 if (Current->is(tok::l_paren)) {
63 Def.ObjectLike = false;
64 if (!parseParams())
65 return {};
66 }
67 if (!parseExpansion())
68 return {};
69
70 return Def;
71 }
72
73 private:
parseParams()74 bool parseParams() {
75 assert(Current->is(tok::l_paren));
76 nextToken();
77 while (Current->is(tok::identifier)) {
78 Def.Params.push_back(Current);
79 Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80 nextToken();
81 if (Current->isNot(tok::comma))
82 break;
83 nextToken();
84 }
85 if (Current->isNot(tok::r_paren))
86 return false;
87 nextToken();
88 return true;
89 }
90
parseExpansion()91 bool parseExpansion() {
92 if (!Current->isOneOf(tok::equal, tok::eof))
93 return false;
94 if (Current->is(tok::equal))
95 nextToken();
96 parseTail();
97 return true;
98 }
99
parseTail()100 void parseTail() {
101 while (Current->isNot(tok::eof)) {
102 Def.Body.push_back(Current);
103 nextToken();
104 }
105 Def.Body.push_back(Current);
106 }
107
nextToken()108 void nextToken() {
109 if (Pos + 1 < Tokens.size())
110 ++Pos;
111 Current = Tokens[Pos];
112 Current->Finalized = true;
113 }
114
115 size_t Pos = 0;
116 FormatToken *Current = nullptr;
117 Definition Def;
118 ArrayRef<FormatToken *> Tokens;
119 };
120
MacroExpander(const std::vector<std::string> & Macros,clang::SourceManager & SourceMgr,const FormatStyle & Style,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)121 MacroExpander::MacroExpander(
122 const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
123 const FormatStyle &Style,
124 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125 IdentifierTable &IdentTable)
126 : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127 IdentTable(IdentTable) {
128 for (const std::string &Macro : Macros)
129 parseDefinition(Macro);
130 }
131
132 MacroExpander::~MacroExpander() = default;
133
parseDefinition(const std::string & Macro)134 void MacroExpander::parseDefinition(const std::string &Macro) {
135 Buffers.push_back(
136 llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
137 clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138 FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139 Allocator, IdentTable);
140 const auto Tokens = Lex.lex();
141 if (!Tokens.empty()) {
142 DefinitionParser Parser(Tokens);
143 auto Definition = Parser.parse();
144 Definitions[Definition.Name] = std::move(Definition);
145 }
146 }
147
defined(llvm::StringRef Name) const148 bool MacroExpander::defined(llvm::StringRef Name) const {
149 return Definitions.find(Name) != Definitions.end();
150 }
151
objectLike(llvm::StringRef Name) const152 bool MacroExpander::objectLike(llvm::StringRef Name) const {
153 return Definitions.find(Name)->second.ObjectLike;
154 }
155
expand(FormatToken * ID,ArgsList Args) const156 llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
157 ArgsList Args) const {
158 assert(defined(ID->TokenText));
159 SmallVector<FormatToken *, 8> Result;
160 const Definition &Def = Definitions.find(ID->TokenText)->second;
161
162 // Expand each argument at most once.
163 llvm::StringSet<> ExpandedArgs;
164
165 // Adds the given token to Result.
166 auto pushToken = [&](FormatToken *Tok) {
167 Tok->MacroCtx->ExpandedFrom.push_back(ID);
168 Result.push_back(Tok);
169 };
170
171 // If Tok references a parameter, adds the corresponding argument to Result.
172 // Returns false if Tok does not reference a parameter.
173 auto expandArgument = [&](FormatToken *Tok) -> bool {
174 // If the current token references a parameter, expand the corresponding
175 // argument.
176 if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
177 return false;
178 ExpandedArgs.insert(Tok->TokenText);
179 auto I = Def.ArgMap.find(Tok->TokenText);
180 if (I == Def.ArgMap.end())
181 return false;
182 // If there are fewer arguments than referenced parameters, treat the
183 // parameter as empty.
184 // FIXME: Potentially fully abort the expansion instead.
185 if (I->getValue() >= Args.size())
186 return true;
187 for (FormatToken *Arg : Args[I->getValue()]) {
188 // A token can be part of a macro argument at multiple levels.
189 // For example, with "ID(x) x":
190 // in ID(ID(x)), 'x' is expanded first as argument to the inner
191 // ID, then again as argument to the outer ID. We keep the macro
192 // role the token had from the inner expansion.
193 if (!Arg->MacroCtx)
194 Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
195 pushToken(Arg);
196 }
197 return true;
198 };
199
200 // Expand the definition into Result.
201 for (FormatToken *Tok : Def.Body) {
202 if (expandArgument(Tok))
203 continue;
204 // Create a copy of the tokens from the macro body, i.e. were not provided
205 // by user code.
206 FormatToken *New = new (Allocator.Allocate()) FormatToken;
207 New->copyFrom(*Tok);
208 assert(!New->MacroCtx);
209 // Tokens that are not part of the user code are not formatted.
210 New->MacroCtx = MacroExpansion(MR_Hidden);
211 pushToken(New);
212 }
213 assert(Result.size() >= 1 && Result.back()->is(tok::eof));
214 if (Result.size() > 1) {
215 ++Result[0]->MacroCtx->StartOfExpansion;
216 ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
217 }
218 return Result;
219 }
220
221 } // namespace format
222 } // namespace clang
223