1a9ac8606Spatrick //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2a9ac8606Spatrick //
3*12c85518Srobert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*12c85518Srobert // See https://llvm.org/LICENSE.txt for license information.
5*12c85518Srobert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a9ac8606Spatrick //
7a9ac8606Spatrick //===----------------------------------------------------------------------===//
8a9ac8606Spatrick ///
9a9ac8606Spatrick /// \file
10a9ac8606Spatrick /// This file contains the implementation of MacroExpander, which handles macro
11a9ac8606Spatrick /// configuration and expansion while formatting.
12a9ac8606Spatrick ///
13a9ac8606Spatrick //===----------------------------------------------------------------------===//
14a9ac8606Spatrick 
15a9ac8606Spatrick #include "Macros.h"
16a9ac8606Spatrick 
17a9ac8606Spatrick #include "Encoding.h"
18a9ac8606Spatrick #include "FormatToken.h"
19a9ac8606Spatrick #include "FormatTokenLexer.h"
20a9ac8606Spatrick #include "clang/Basic/TokenKinds.h"
21a9ac8606Spatrick #include "clang/Format/Format.h"
22a9ac8606Spatrick #include "clang/Lex/HeaderSearch.h"
23a9ac8606Spatrick #include "clang/Lex/HeaderSearchOptions.h"
24a9ac8606Spatrick #include "clang/Lex/Lexer.h"
25a9ac8606Spatrick #include "clang/Lex/ModuleLoader.h"
26a9ac8606Spatrick #include "clang/Lex/Preprocessor.h"
27a9ac8606Spatrick #include "clang/Lex/PreprocessorOptions.h"
28a9ac8606Spatrick #include "llvm/ADT/StringSet.h"
29a9ac8606Spatrick #include "llvm/Support/ErrorHandling.h"
30a9ac8606Spatrick 
31a9ac8606Spatrick namespace clang {
32a9ac8606Spatrick namespace format {
33a9ac8606Spatrick 
34a9ac8606Spatrick struct MacroExpander::Definition {
35a9ac8606Spatrick   StringRef Name;
36a9ac8606Spatrick   SmallVector<FormatToken *, 8> Params;
37a9ac8606Spatrick   SmallVector<FormatToken *, 8> Body;
38a9ac8606Spatrick 
39a9ac8606Spatrick   // Map from each argument's name to its position in the argument list.
40a9ac8606Spatrick   // With "M(x, y) x + y":
41a9ac8606Spatrick   //   x -> 0
42a9ac8606Spatrick   //   y -> 1
43a9ac8606Spatrick   llvm::StringMap<size_t> ArgMap;
44a9ac8606Spatrick 
45a9ac8606Spatrick   bool ObjectLike = true;
46a9ac8606Spatrick };
47a9ac8606Spatrick 
48a9ac8606Spatrick class MacroExpander::DefinitionParser {
49a9ac8606Spatrick public:
DefinitionParser(ArrayRef<FormatToken * > Tokens)50a9ac8606Spatrick   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51a9ac8606Spatrick     assert(!Tokens.empty());
52a9ac8606Spatrick     Current = Tokens[0];
53a9ac8606Spatrick   }
54a9ac8606Spatrick 
55*12c85518Srobert   // Parse the token stream and return the corresponding Definition object.
56a9ac8606Spatrick   // Returns an empty definition object with a null-Name on error.
parse()57a9ac8606Spatrick   MacroExpander::Definition parse() {
58a9ac8606Spatrick     if (!Current->is(tok::identifier))
59a9ac8606Spatrick       return {};
60a9ac8606Spatrick     Def.Name = Current->TokenText;
61a9ac8606Spatrick     nextToken();
62a9ac8606Spatrick     if (Current->is(tok::l_paren)) {
63a9ac8606Spatrick       Def.ObjectLike = false;
64a9ac8606Spatrick       if (!parseParams())
65a9ac8606Spatrick         return {};
66a9ac8606Spatrick     }
67a9ac8606Spatrick     if (!parseExpansion())
68a9ac8606Spatrick       return {};
69a9ac8606Spatrick 
70a9ac8606Spatrick     return Def;
71a9ac8606Spatrick   }
72a9ac8606Spatrick 
73a9ac8606Spatrick private:
parseParams()74a9ac8606Spatrick   bool parseParams() {
75a9ac8606Spatrick     assert(Current->is(tok::l_paren));
76a9ac8606Spatrick     nextToken();
77a9ac8606Spatrick     while (Current->is(tok::identifier)) {
78a9ac8606Spatrick       Def.Params.push_back(Current);
79a9ac8606Spatrick       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80a9ac8606Spatrick       nextToken();
81a9ac8606Spatrick       if (Current->isNot(tok::comma))
82a9ac8606Spatrick         break;
83a9ac8606Spatrick       nextToken();
84a9ac8606Spatrick     }
85a9ac8606Spatrick     if (Current->isNot(tok::r_paren))
86a9ac8606Spatrick       return false;
87a9ac8606Spatrick     nextToken();
88a9ac8606Spatrick     return true;
89a9ac8606Spatrick   }
90a9ac8606Spatrick 
parseExpansion()91a9ac8606Spatrick   bool parseExpansion() {
92a9ac8606Spatrick     if (!Current->isOneOf(tok::equal, tok::eof))
93a9ac8606Spatrick       return false;
94a9ac8606Spatrick     if (Current->is(tok::equal))
95a9ac8606Spatrick       nextToken();
96a9ac8606Spatrick     parseTail();
97a9ac8606Spatrick     return true;
98a9ac8606Spatrick   }
99a9ac8606Spatrick 
parseTail()100a9ac8606Spatrick   void parseTail() {
101a9ac8606Spatrick     while (Current->isNot(tok::eof)) {
102a9ac8606Spatrick       Def.Body.push_back(Current);
103a9ac8606Spatrick       nextToken();
104a9ac8606Spatrick     }
105a9ac8606Spatrick     Def.Body.push_back(Current);
106a9ac8606Spatrick   }
107a9ac8606Spatrick 
nextToken()108a9ac8606Spatrick   void nextToken() {
109a9ac8606Spatrick     if (Pos + 1 < Tokens.size())
110a9ac8606Spatrick       ++Pos;
111a9ac8606Spatrick     Current = Tokens[Pos];
112a9ac8606Spatrick     Current->Finalized = true;
113a9ac8606Spatrick   }
114a9ac8606Spatrick 
115a9ac8606Spatrick   size_t Pos = 0;
116a9ac8606Spatrick   FormatToken *Current = nullptr;
117a9ac8606Spatrick   Definition Def;
118a9ac8606Spatrick   ArrayRef<FormatToken *> Tokens;
119a9ac8606Spatrick };
120a9ac8606Spatrick 
MacroExpander(const std::vector<std::string> & Macros,clang::SourceManager & SourceMgr,const FormatStyle & Style,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)121a9ac8606Spatrick MacroExpander::MacroExpander(
122a9ac8606Spatrick     const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
123a9ac8606Spatrick     const FormatStyle &Style,
124a9ac8606Spatrick     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125a9ac8606Spatrick     IdentifierTable &IdentTable)
126a9ac8606Spatrick     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127a9ac8606Spatrick       IdentTable(IdentTable) {
128*12c85518Srobert   for (const std::string &Macro : Macros)
129a9ac8606Spatrick     parseDefinition(Macro);
130a9ac8606Spatrick }
131a9ac8606Spatrick 
132a9ac8606Spatrick MacroExpander::~MacroExpander() = default;
133a9ac8606Spatrick 
parseDefinition(const std::string & Macro)134a9ac8606Spatrick void MacroExpander::parseDefinition(const std::string &Macro) {
135a9ac8606Spatrick   Buffers.push_back(
136a9ac8606Spatrick       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
137a9ac8606Spatrick   clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138a9ac8606Spatrick   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139a9ac8606Spatrick                        Allocator, IdentTable);
140a9ac8606Spatrick   const auto Tokens = Lex.lex();
141a9ac8606Spatrick   if (!Tokens.empty()) {
142a9ac8606Spatrick     DefinitionParser Parser(Tokens);
143a9ac8606Spatrick     auto Definition = Parser.parse();
144a9ac8606Spatrick     Definitions[Definition.Name] = std::move(Definition);
145a9ac8606Spatrick   }
146a9ac8606Spatrick }
147a9ac8606Spatrick 
defined(llvm::StringRef Name) const148a9ac8606Spatrick bool MacroExpander::defined(llvm::StringRef Name) const {
149a9ac8606Spatrick   return Definitions.find(Name) != Definitions.end();
150a9ac8606Spatrick }
151a9ac8606Spatrick 
objectLike(llvm::StringRef Name) const152a9ac8606Spatrick bool MacroExpander::objectLike(llvm::StringRef Name) const {
153a9ac8606Spatrick   return Definitions.find(Name)->second.ObjectLike;
154a9ac8606Spatrick }
155a9ac8606Spatrick 
expand(FormatToken * ID,ArgsList Args) const156a9ac8606Spatrick llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
157a9ac8606Spatrick                                                           ArgsList Args) const {
158a9ac8606Spatrick   assert(defined(ID->TokenText));
159a9ac8606Spatrick   SmallVector<FormatToken *, 8> Result;
160a9ac8606Spatrick   const Definition &Def = Definitions.find(ID->TokenText)->second;
161a9ac8606Spatrick 
162a9ac8606Spatrick   // Expand each argument at most once.
163a9ac8606Spatrick   llvm::StringSet<> ExpandedArgs;
164a9ac8606Spatrick 
165a9ac8606Spatrick   // Adds the given token to Result.
166a9ac8606Spatrick   auto pushToken = [&](FormatToken *Tok) {
167a9ac8606Spatrick     Tok->MacroCtx->ExpandedFrom.push_back(ID);
168a9ac8606Spatrick     Result.push_back(Tok);
169a9ac8606Spatrick   };
170a9ac8606Spatrick 
171a9ac8606Spatrick   // If Tok references a parameter, adds the corresponding argument to Result.
172a9ac8606Spatrick   // Returns false if Tok does not reference a parameter.
173a9ac8606Spatrick   auto expandArgument = [&](FormatToken *Tok) -> bool {
174a9ac8606Spatrick     // If the current token references a parameter, expand the corresponding
175a9ac8606Spatrick     // argument.
176a9ac8606Spatrick     if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
177a9ac8606Spatrick       return false;
178a9ac8606Spatrick     ExpandedArgs.insert(Tok->TokenText);
179a9ac8606Spatrick     auto I = Def.ArgMap.find(Tok->TokenText);
180a9ac8606Spatrick     if (I == Def.ArgMap.end())
181a9ac8606Spatrick       return false;
182a9ac8606Spatrick     // If there are fewer arguments than referenced parameters, treat the
183a9ac8606Spatrick     // parameter as empty.
184a9ac8606Spatrick     // FIXME: Potentially fully abort the expansion instead.
185a9ac8606Spatrick     if (I->getValue() >= Args.size())
186a9ac8606Spatrick       return true;
187a9ac8606Spatrick     for (FormatToken *Arg : Args[I->getValue()]) {
188a9ac8606Spatrick       // A token can be part of a macro argument at multiple levels.
189a9ac8606Spatrick       // For example, with "ID(x) x":
190a9ac8606Spatrick       // in ID(ID(x)), 'x' is expanded first as argument to the inner
191a9ac8606Spatrick       // ID, then again as argument to the outer ID. We keep the macro
192a9ac8606Spatrick       // role the token had from the inner expansion.
193a9ac8606Spatrick       if (!Arg->MacroCtx)
194a9ac8606Spatrick         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
195a9ac8606Spatrick       pushToken(Arg);
196a9ac8606Spatrick     }
197a9ac8606Spatrick     return true;
198a9ac8606Spatrick   };
199a9ac8606Spatrick 
200a9ac8606Spatrick   // Expand the definition into Result.
201a9ac8606Spatrick   for (FormatToken *Tok : Def.Body) {
202a9ac8606Spatrick     if (expandArgument(Tok))
203a9ac8606Spatrick       continue;
204a9ac8606Spatrick     // Create a copy of the tokens from the macro body, i.e. were not provided
205a9ac8606Spatrick     // by user code.
206a9ac8606Spatrick     FormatToken *New = new (Allocator.Allocate()) FormatToken;
207a9ac8606Spatrick     New->copyFrom(*Tok);
208a9ac8606Spatrick     assert(!New->MacroCtx);
209a9ac8606Spatrick     // Tokens that are not part of the user code are not formatted.
210a9ac8606Spatrick     New->MacroCtx = MacroExpansion(MR_Hidden);
211a9ac8606Spatrick     pushToken(New);
212a9ac8606Spatrick   }
213a9ac8606Spatrick   assert(Result.size() >= 1 && Result.back()->is(tok::eof));
214a9ac8606Spatrick   if (Result.size() > 1) {
215a9ac8606Spatrick     ++Result[0]->MacroCtx->StartOfExpansion;
216a9ac8606Spatrick     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
217a9ac8606Spatrick   }
218a9ac8606Spatrick   return Result;
219a9ac8606Spatrick }
220a9ac8606Spatrick 
221a9ac8606Spatrick } // namespace format
222a9ac8606Spatrick } // namespace clang
223