1753f127fSDimitry Andric //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2753f127fSDimitry Andric //
3753f127fSDimitry Andric //                     The LLVM Compiler Infrastructure
4753f127fSDimitry Andric //
5753f127fSDimitry Andric // This file is distributed under the University of Illinois Open Source
6753f127fSDimitry Andric // License. See LICENSE.TXT for details.
7753f127fSDimitry Andric //
8753f127fSDimitry Andric //===----------------------------------------------------------------------===//
9753f127fSDimitry Andric ///
10753f127fSDimitry Andric /// \file
11753f127fSDimitry Andric /// This file contains the implementation of MacroCallReconstructor, which fits
12753f127fSDimitry Andric /// an reconstructed macro call to a parsed set of UnwrappedLines.
13753f127fSDimitry Andric ///
14753f127fSDimitry Andric //===----------------------------------------------------------------------===//
15753f127fSDimitry Andric 
16753f127fSDimitry Andric #include "Macros.h"
17753f127fSDimitry Andric 
18753f127fSDimitry Andric #include "UnwrappedLineParser.h"
19753f127fSDimitry Andric #include "clang/Basic/TokenKinds.h"
20753f127fSDimitry Andric #include "llvm/ADT/DenseSet.h"
21753f127fSDimitry Andric #include "llvm/Support/Debug.h"
22753f127fSDimitry Andric #include <cassert>
23753f127fSDimitry Andric 
24753f127fSDimitry Andric #define DEBUG_TYPE "format-reconstruct"
25753f127fSDimitry Andric 
26753f127fSDimitry Andric namespace clang {
27753f127fSDimitry Andric namespace format {
28753f127fSDimitry Andric 
29753f127fSDimitry Andric // Call \p Call for each token in the unwrapped line given, passing
30753f127fSDimitry Andric // the token, its parent and whether it is the first token in the line.
31753f127fSDimitry Andric template <typename T>
forEachToken(const UnwrappedLine & Line,const T & Call,FormatToken * Parent=nullptr)32753f127fSDimitry Andric void forEachToken(const UnwrappedLine &Line, const T &Call,
33753f127fSDimitry Andric                   FormatToken *Parent = nullptr) {
34753f127fSDimitry Andric   bool First = true;
35753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
36753f127fSDimitry Andric     Call(N.Tok, Parent, First);
37753f127fSDimitry Andric     First = false;
38bdd1243dSDimitry Andric     for (const auto &Child : N.Children)
39753f127fSDimitry Andric       forEachToken(Child, Call, N.Tok);
40753f127fSDimitry Andric   }
41753f127fSDimitry Andric }
42753f127fSDimitry Andric 
MacroCallReconstructor(unsigned Level,const llvm::DenseMap<FormatToken *,std::unique_ptr<UnwrappedLine>> & ActiveExpansions)43753f127fSDimitry Andric MacroCallReconstructor::MacroCallReconstructor(
44753f127fSDimitry Andric     unsigned Level,
45753f127fSDimitry Andric     const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
46753f127fSDimitry Andric         &ActiveExpansions)
47753f127fSDimitry Andric     : Level(Level), IdToReconstructed(ActiveExpansions) {
48753f127fSDimitry Andric   Result.Tokens.push_back(std::make_unique<LineNode>());
49753f127fSDimitry Andric   ActiveReconstructedLines.push_back(&Result);
50753f127fSDimitry Andric }
51753f127fSDimitry Andric 
addLine(const UnwrappedLine & Line)52753f127fSDimitry Andric void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
53753f127fSDimitry Andric   assert(State != Finalized);
54753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55753f127fSDimitry Andric   forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
56753f127fSDimitry Andric     add(Token, Parent, First);
57753f127fSDimitry Andric   });
58753f127fSDimitry Andric   assert(InProgress || finished());
59753f127fSDimitry Andric }
60753f127fSDimitry Andric 
takeResult()61753f127fSDimitry Andric UnwrappedLine MacroCallReconstructor::takeResult() && {
62753f127fSDimitry Andric   finalize();
63bdd1243dSDimitry Andric   assert(Result.Tokens.size() == 1 &&
64bdd1243dSDimitry Andric          Result.Tokens.front()->Children.size() == 1);
65753f127fSDimitry Andric   UnwrappedLine Final =
66753f127fSDimitry Andric       createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
67753f127fSDimitry Andric   assert(!Final.Tokens.empty());
68753f127fSDimitry Andric   return Final;
69753f127fSDimitry Andric }
70753f127fSDimitry Andric 
71753f127fSDimitry Andric // Reconstruct the position of the next \p Token, given its parent \p
72753f127fSDimitry Andric // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73753f127fSDimitry Andric // is the first token in a given unwrapped line.
add(FormatToken * Token,FormatToken * ExpandedParent,bool First)74753f127fSDimitry Andric void MacroCallReconstructor::add(FormatToken *Token,
75753f127fSDimitry Andric                                  FormatToken *ExpandedParent, bool First) {
76753f127fSDimitry Andric   LLVM_DEBUG(
77753f127fSDimitry Andric       llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78753f127fSDimitry Andric                    << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79753f127fSDimitry Andric                    << ", First: " << First << "\n");
80753f127fSDimitry Andric   // In order to be able to find the correct parent in the reconstructed token
81753f127fSDimitry Andric   // stream, we need to continue the last open reconstruction until we find the
82753f127fSDimitry Andric   // given token if it is part of the reconstructed token stream.
83753f127fSDimitry Andric   //
84753f127fSDimitry Andric   // Note that hidden tokens can be part of the reconstructed stream in nested
85753f127fSDimitry Andric   // macro calls.
86753f127fSDimitry Andric   // For example, given
87753f127fSDimitry Andric   //   #define C(x, y) x y
88753f127fSDimitry Andric   //   #define B(x) {x}
89753f127fSDimitry Andric   // And the call:
90753f127fSDimitry Andric   //   C(a, B(b))
91753f127fSDimitry Andric   // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92753f127fSDimitry Andric   // found in the reconstructed token stream of that expansion level.
93753f127fSDimitry Andric   // In the expanded token stream
94753f127fSDimitry Andric   //   a {b}
95753f127fSDimitry Andric   // 'b' is a child of '{'. We need to continue the open expansion of the ','
96753f127fSDimitry Andric   // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97753f127fSDimitry Andric   // so we later set the spelled token 'b' as a child of the ','.
98753f127fSDimitry Andric   if (!ActiveExpansions.empty() && Token->MacroCtx &&
99753f127fSDimitry Andric       (Token->MacroCtx->Role != MR_Hidden ||
100753f127fSDimitry Andric        ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101753f127fSDimitry Andric     if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102753f127fSDimitry Andric       First = true;
103753f127fSDimitry Andric   }
104753f127fSDimitry Andric 
105753f127fSDimitry Andric   prepareParent(ExpandedParent, First);
106753f127fSDimitry Andric 
107753f127fSDimitry Andric   if (Token->MacroCtx) {
108753f127fSDimitry Andric     // If this token was generated by a macro call, add the reconstructed
109753f127fSDimitry Andric     // equivalent of the token.
110753f127fSDimitry Andric     reconstruct(Token);
111753f127fSDimitry Andric   } else {
112753f127fSDimitry Andric     // Otherwise, we add it to the current line.
113753f127fSDimitry Andric     appendToken(Token);
114753f127fSDimitry Andric   }
115753f127fSDimitry Andric }
116753f127fSDimitry Andric 
117753f127fSDimitry Andric // Adjusts the stack of active reconstructed lines so we're ready to push
118753f127fSDimitry Andric // tokens. The tokens to be pushed are children of ExpandedParent in the
119753f127fSDimitry Andric // expanded code.
120753f127fSDimitry Andric //
121753f127fSDimitry Andric // This may entail:
122753f127fSDimitry Andric // - creating a new line, if the parent is on the active line
123753f127fSDimitry Andric // - popping active lines, if the parent is further up the stack
124753f127fSDimitry Andric //
125753f127fSDimitry Andric // Postcondition:
126753f127fSDimitry Andric // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127753f127fSDimitry Andric // reconstructed replacement token as a parent (when possible) - that is, the
128753f127fSDimitry Andric // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129753f127fSDimitry Andric // is the parent of ActiveReconstructedLines.back() in the reconstructed
130753f127fSDimitry Andric // unwrapped line.
prepareParent(FormatToken * ExpandedParent,bool NewLine)131753f127fSDimitry Andric void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132753f127fSDimitry Andric                                            bool NewLine) {
133753f127fSDimitry Andric   LLVM_DEBUG({
134753f127fSDimitry Andric     llvm::dbgs() << "ParentMap:\n";
135753f127fSDimitry Andric     debugParentMap();
136753f127fSDimitry Andric   });
137753f127fSDimitry Andric   // We want to find the parent in the new unwrapped line, where the expanded
138753f127fSDimitry Andric   // parent might have been replaced during reconstruction.
139753f127fSDimitry Andric   FormatToken *Parent = getParentInResult(ExpandedParent);
140753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141753f127fSDimitry Andric                           << (Parent ? Parent->TokenText : "<null>") << "\n");
142753f127fSDimitry Andric 
143753f127fSDimitry Andric   FormatToken *OpenMacroParent = nullptr;
144753f127fSDimitry Andric   if (!MacroCallStructure.empty()) {
145753f127fSDimitry Andric     // Inside a macro expansion, it is possible to lose track of the correct
146753f127fSDimitry Andric     // parent - either because it is already popped, for example because it was
147753f127fSDimitry Andric     // in a different macro argument (e.g. M({, })), or when we work on invalid
148753f127fSDimitry Andric     // code.
149753f127fSDimitry Andric     // Thus, we use the innermost macro call's parent as the parent at which
150753f127fSDimitry Andric     // we stop; this allows us to stay within the macro expansion and keeps
151753f127fSDimitry Andric     // any problems confined to the extent of the macro call.
152753f127fSDimitry Andric     OpenMacroParent =
153753f127fSDimitry Andric         getParentInResult(MacroCallStructure.back().MacroCallLParen);
154753f127fSDimitry Andric     LLVM_DEBUG(llvm::dbgs()
155753f127fSDimitry Andric                << "MacroCallLParen: "
156753f127fSDimitry Andric                << MacroCallStructure.back().MacroCallLParen->TokenText
157753f127fSDimitry Andric                << ", OpenMacroParent: "
158753f127fSDimitry Andric                << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159753f127fSDimitry Andric                << "\n");
160753f127fSDimitry Andric   }
161753f127fSDimitry Andric   if (NewLine ||
162753f127fSDimitry Andric       (!ActiveReconstructedLines.back()->Tokens.empty() &&
163753f127fSDimitry Andric        Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164753f127fSDimitry Andric     // If we are at the first token in a new line, we want to also
165753f127fSDimitry Andric     // create a new line in the resulting reconstructed unwrapped line.
166753f127fSDimitry Andric     while (ActiveReconstructedLines.back()->Tokens.empty() ||
167753f127fSDimitry Andric            (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168753f127fSDimitry Andric             ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169753f127fSDimitry Andric                 OpenMacroParent)) {
170753f127fSDimitry Andric       ActiveReconstructedLines.pop_back();
171753f127fSDimitry Andric       assert(!ActiveReconstructedLines.empty());
172753f127fSDimitry Andric     }
173753f127fSDimitry Andric     assert(!ActiveReconstructedLines.empty());
174753f127fSDimitry Andric     ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175753f127fSDimitry Andric         std::make_unique<ReconstructedLine>());
176753f127fSDimitry Andric     ActiveReconstructedLines.push_back(
177753f127fSDimitry Andric         &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178753f127fSDimitry Andric   } else if (parentLine().Tokens.back()->Tok != Parent) {
179753f127fSDimitry Andric     // If we're not the first token in a new line, pop lines until we find
180753f127fSDimitry Andric     // the child of \c Parent in the stack.
181753f127fSDimitry Andric     while (Parent != parentLine().Tokens.back()->Tok &&
182753f127fSDimitry Andric            parentLine().Tokens.back()->Tok &&
183753f127fSDimitry Andric            parentLine().Tokens.back()->Tok != OpenMacroParent) {
184753f127fSDimitry Andric       ActiveReconstructedLines.pop_back();
185753f127fSDimitry Andric       assert(!ActiveReconstructedLines.empty());
186753f127fSDimitry Andric     }
187753f127fSDimitry Andric   }
188753f127fSDimitry Andric   assert(!ActiveReconstructedLines.empty());
189753f127fSDimitry Andric }
190753f127fSDimitry Andric 
191753f127fSDimitry Andric // For a given \p Parent in the incoming expanded token stream, find the
192753f127fSDimitry Andric // corresponding parent in the output.
getParentInResult(FormatToken * Parent)193753f127fSDimitry Andric FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194753f127fSDimitry Andric   FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195753f127fSDimitry Andric   if (!Mapped)
196753f127fSDimitry Andric     return Parent;
197bdd1243dSDimitry Andric   for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
198753f127fSDimitry Andric     Parent = Mapped;
199753f127fSDimitry Andric   // If we use a different token than the parent in the expanded token stream
200753f127fSDimitry Andric   // as parent, mark it as a special parent, so the formatting code knows it
201753f127fSDimitry Andric   // needs to have its children formatted.
202753f127fSDimitry Andric   Parent->MacroParent = true;
203753f127fSDimitry Andric   return Parent;
204753f127fSDimitry Andric }
205753f127fSDimitry Andric 
206753f127fSDimitry Andric // Reconstruct a \p Token that was expanded from a macro call.
reconstruct(FormatToken * Token)207753f127fSDimitry Andric void MacroCallReconstructor::reconstruct(FormatToken *Token) {
208753f127fSDimitry Andric   assert(Token->MacroCtx);
209753f127fSDimitry Andric   // A single token can be the only result of a macro call:
210753f127fSDimitry Andric   // Given: #define ID(x, y) ;
211753f127fSDimitry Andric   // And the call: ID(<some>, <tokens>)
212753f127fSDimitry Andric   // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213753f127fSDimitry Andric   if (Token->MacroCtx->StartOfExpansion) {
214753f127fSDimitry Andric     startReconstruction(Token);
215753f127fSDimitry Andric     // If the order of tokens in the expanded token stream is not the
216753f127fSDimitry Andric     // same as the order of tokens in the reconstructed stream, we need
217753f127fSDimitry Andric     // to reconstruct tokens that arrive later in the stream.
218bdd1243dSDimitry Andric     if (Token->MacroCtx->Role != MR_Hidden)
219753f127fSDimitry Andric       reconstructActiveCallUntil(Token);
220753f127fSDimitry Andric   }
221753f127fSDimitry Andric   assert(!ActiveExpansions.empty());
222753f127fSDimitry Andric   if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
223753f127fSDimitry Andric     assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
224753f127fSDimitry Andric     if (Token->MacroCtx->Role != MR_Hidden) {
225753f127fSDimitry Andric       // The current token in the reconstructed token stream must be the token
226753f127fSDimitry Andric       // we're looking for - we either arrive here after startReconstruction,
227753f127fSDimitry Andric       // which initiates the stream to the first token, or after
228753f127fSDimitry Andric       // continueReconstructionUntil skipped until the expected token in the
229753f127fSDimitry Andric       // reconstructed stream at the start of add(...).
230753f127fSDimitry Andric       assert(ActiveExpansions.back().SpelledI->Tok == Token);
231753f127fSDimitry Andric       processNextReconstructed();
232753f127fSDimitry Andric     } else if (!currentLine()->Tokens.empty()) {
233753f127fSDimitry Andric       // Map all hidden tokens to the last visible token in the output.
234753f127fSDimitry Andric       // If the hidden token is a parent, we'll use the last visible
235753f127fSDimitry Andric       // token as the parent of the hidden token's children.
236753f127fSDimitry Andric       SpelledParentToReconstructedParent[Token] =
237753f127fSDimitry Andric           currentLine()->Tokens.back()->Tok;
238753f127fSDimitry Andric     } else {
239753f127fSDimitry Andric       for (auto I = ActiveReconstructedLines.rbegin(),
240753f127fSDimitry Andric                 E = ActiveReconstructedLines.rend();
241753f127fSDimitry Andric            I != E; ++I) {
242753f127fSDimitry Andric         if (!(*I)->Tokens.empty()) {
243753f127fSDimitry Andric           SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
244753f127fSDimitry Andric           break;
245753f127fSDimitry Andric         }
246753f127fSDimitry Andric       }
247753f127fSDimitry Andric     }
248753f127fSDimitry Andric   }
249753f127fSDimitry Andric   if (Token->MacroCtx->EndOfExpansion)
250753f127fSDimitry Andric     endReconstruction(Token);
251753f127fSDimitry Andric }
252753f127fSDimitry Andric 
253753f127fSDimitry Andric // Given a \p Token that starts an expansion, reconstruct the beginning of the
254753f127fSDimitry Andric // macro call.
255753f127fSDimitry Andric // For example, given: #define ID(x) x
256753f127fSDimitry Andric // And the call: ID(int a)
257753f127fSDimitry Andric // Reconstructs: ID(
startReconstruction(FormatToken * Token)258753f127fSDimitry Andric void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
259753f127fSDimitry Andric   assert(Token->MacroCtx);
260753f127fSDimitry Andric   assert(!Token->MacroCtx->ExpandedFrom.empty());
261753f127fSDimitry Andric   assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
262753f127fSDimitry Andric #ifndef NDEBUG
263753f127fSDimitry Andric   // Check that the token's reconstruction stack matches our current
264753f127fSDimitry Andric   // reconstruction stack.
265753f127fSDimitry Andric   for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
266753f127fSDimitry Andric     assert(ActiveExpansions[I].ID ==
267753f127fSDimitry Andric            Token->MacroCtx
268753f127fSDimitry Andric                ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
269753f127fSDimitry Andric   }
270753f127fSDimitry Andric #endif
271753f127fSDimitry Andric   // Start reconstruction for all calls for which this token is the first token
272753f127fSDimitry Andric   // generated by the call.
273753f127fSDimitry Andric   // Note that the token's expanded from stack is inside-to-outside, and the
274753f127fSDimitry Andric   // expansions for which this token is not the first are the outermost ones.
275753f127fSDimitry Andric   ArrayRef<FormatToken *> StartedMacros =
276bdd1243dSDimitry Andric       ArrayRef(Token->MacroCtx->ExpandedFrom)
277753f127fSDimitry Andric           .drop_back(ActiveExpansions.size());
278753f127fSDimitry Andric   assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
279753f127fSDimitry Andric   // We reconstruct macro calls outside-to-inside.
280753f127fSDimitry Andric   for (FormatToken *ID : llvm::reverse(StartedMacros)) {
281753f127fSDimitry Andric     // We found a macro call to be reconstructed; the next time our
282753f127fSDimitry Andric     // reconstruction stack is empty we know we finished an reconstruction.
283753f127fSDimitry Andric #ifndef NDEBUG
284753f127fSDimitry Andric     State = InProgress;
285753f127fSDimitry Andric #endif
286753f127fSDimitry Andric     // Put the reconstructed macro call's token into our reconstruction stack.
287753f127fSDimitry Andric     auto IU = IdToReconstructed.find(ID);
288753f127fSDimitry Andric     assert(IU != IdToReconstructed.end());
289753f127fSDimitry Andric     ActiveExpansions.push_back(
290753f127fSDimitry Andric         {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
291753f127fSDimitry Andric     // Process the macro call's identifier.
292753f127fSDimitry Andric     processNextReconstructed();
293753f127fSDimitry Andric     if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
294753f127fSDimitry Andric       continue;
295753f127fSDimitry Andric     if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
296753f127fSDimitry Andric       // Process the optional opening parenthesis.
297753f127fSDimitry Andric       processNextReconstructed();
298753f127fSDimitry Andric     }
299753f127fSDimitry Andric   }
300753f127fSDimitry Andric }
301753f127fSDimitry Andric 
302753f127fSDimitry Andric // Add all tokens in the reconstruction stream to the output until we find the
303753f127fSDimitry Andric // given \p Token.
reconstructActiveCallUntil(FormatToken * Token)304753f127fSDimitry Andric bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
305753f127fSDimitry Andric   assert(!ActiveExpansions.empty());
306753f127fSDimitry Andric   bool PassedMacroComma = false;
307753f127fSDimitry Andric   // FIXME: If Token was already expanded earlier, due to
308753f127fSDimitry Andric   // a change in order, we will not find it, but need to
309753f127fSDimitry Andric   // skip it.
310753f127fSDimitry Andric   while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
311753f127fSDimitry Andric          ActiveExpansions.back().SpelledI->Tok != Token) {
312753f127fSDimitry Andric     PassedMacroComma = processNextReconstructed() || PassedMacroComma;
313753f127fSDimitry Andric   }
314753f127fSDimitry Andric   return PassedMacroComma;
315753f127fSDimitry Andric }
316753f127fSDimitry Andric 
317753f127fSDimitry Andric // End all reconstructions for which \p Token is the final token.
endReconstruction(FormatToken * Token)318753f127fSDimitry Andric void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
319753f127fSDimitry Andric   assert(Token->MacroCtx &&
320753f127fSDimitry Andric          (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
321753f127fSDimitry Andric   for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
322*5f757f3fSDimitry Andric     LLVM_DEBUG([&] {
323*5f757f3fSDimitry Andric       // Check all remaining tokens but the final closing parenthesis and
324*5f757f3fSDimitry Andric       // optional trailing comment were already reconstructed at an inner
325*5f757f3fSDimitry Andric       // expansion level.
326753f127fSDimitry Andric       for (auto T = ActiveExpansions.back().SpelledI;
327753f127fSDimitry Andric            T != ActiveExpansions.back().SpelledE; ++T) {
328753f127fSDimitry Andric         FormatToken *Token = T->Tok;
329753f127fSDimitry Andric         bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
330753f127fSDimitry Andric                              std::next(T)->Tok->isTrailingComment()) &&
331753f127fSDimitry Andric                             !Token->MacroCtx && Token->is(tok::r_paren);
332753f127fSDimitry Andric         bool TrailingComment = Token->isTrailingComment();
333753f127fSDimitry Andric         bool PreviousLevel =
334753f127fSDimitry Andric             Token->MacroCtx &&
335753f127fSDimitry Andric             (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
336bdd1243dSDimitry Andric         if (!ClosingParen && !TrailingComment && !PreviousLevel)
337753f127fSDimitry Andric           llvm::dbgs() << "At token: " << Token->TokenText << "\n";
338753f127fSDimitry Andric         // In addition to the following cases, we can also run into this
339753f127fSDimitry Andric         // when a macro call had more arguments than expected; in that case,
340*5f757f3fSDimitry Andric         // the comma and the remaining tokens in the macro call will
341*5f757f3fSDimitry Andric         // potentially end up in the line when we finish the expansion.
342753f127fSDimitry Andric         // FIXME: Add the information which arguments are unused, and assert
343753f127fSDimitry Andric         // one of the cases below plus reconstructed macro argument tokens.
344753f127fSDimitry Andric         // assert(ClosingParen || TrailingComment || PreviousLevel);
345753f127fSDimitry Andric       }
346*5f757f3fSDimitry Andric     }());
347753f127fSDimitry Andric     // Handle the remaining open tokens:
348753f127fSDimitry Andric     // - expand the closing parenthesis, if it exists, including an optional
349753f127fSDimitry Andric     //   trailing comment
350753f127fSDimitry Andric     // - handle tokens that were already reconstructed at an inner expansion
351753f127fSDimitry Andric     //   level
352753f127fSDimitry Andric     // - handle tokens when a macro call had more than the expected number of
353753f127fSDimitry Andric     //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
354753f127fSDimitry Andric     //   up with the sequence ", b, c)" being open at the end of the
355753f127fSDimitry Andric     //   reconstruction; we want to gracefully handle that case
356753f127fSDimitry Andric     //
357753f127fSDimitry Andric     // FIXME: See the above debug-check for what we will need to do to be
358753f127fSDimitry Andric     // able to assert this.
359753f127fSDimitry Andric     for (auto T = ActiveExpansions.back().SpelledI;
360753f127fSDimitry Andric          T != ActiveExpansions.back().SpelledE; ++T) {
361753f127fSDimitry Andric       processNextReconstructed();
362753f127fSDimitry Andric     }
363753f127fSDimitry Andric     ActiveExpansions.pop_back();
364753f127fSDimitry Andric   }
365753f127fSDimitry Andric }
366753f127fSDimitry Andric 
debugParentMap() const367753f127fSDimitry Andric void MacroCallReconstructor::debugParentMap() const {
368753f127fSDimitry Andric   llvm::DenseSet<FormatToken *> Values;
369753f127fSDimitry Andric   for (const auto &P : SpelledParentToReconstructedParent)
370753f127fSDimitry Andric     Values.insert(P.second);
371753f127fSDimitry Andric 
372753f127fSDimitry Andric   for (const auto &P : SpelledParentToReconstructedParent) {
373753f127fSDimitry Andric     if (Values.contains(P.first))
374753f127fSDimitry Andric       continue;
375753f127fSDimitry Andric     llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
376753f127fSDimitry Andric     for (auto I = SpelledParentToReconstructedParent.find(P.first),
377753f127fSDimitry Andric               E = SpelledParentToReconstructedParent.end();
378753f127fSDimitry Andric          I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
379753f127fSDimitry Andric       llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
380753f127fSDimitry Andric     }
381753f127fSDimitry Andric     llvm::dbgs() << "\n";
382753f127fSDimitry Andric   }
383753f127fSDimitry Andric }
384753f127fSDimitry Andric 
385753f127fSDimitry Andric // If visible, add the next token of the reconstructed token sequence to the
386753f127fSDimitry Andric // output. Returns whether reconstruction passed a comma that is part of a
387753f127fSDimitry Andric // macro call.
processNextReconstructed()388753f127fSDimitry Andric bool MacroCallReconstructor::processNextReconstructed() {
389753f127fSDimitry Andric   FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
390753f127fSDimitry Andric   ++ActiveExpansions.back().SpelledI;
391753f127fSDimitry Andric   if (Token->MacroCtx) {
392753f127fSDimitry Andric     // Skip tokens that are not part of the macro call.
393bdd1243dSDimitry Andric     if (Token->MacroCtx->Role == MR_Hidden)
394753f127fSDimitry Andric       return false;
395753f127fSDimitry Andric     // Skip tokens we already expanded during an inner reconstruction.
396753f127fSDimitry Andric     // For example, given: #define ID(x) {x}
397753f127fSDimitry Andric     // And the call: ID(ID(f))
398753f127fSDimitry Andric     // We get two reconstructions:
399753f127fSDimitry Andric     // ID(f) -> {f}
400753f127fSDimitry Andric     // ID({f}) -> {{f}}
401753f127fSDimitry Andric     // We reconstruct f during the first reconstruction, and skip it during the
402753f127fSDimitry Andric     // second reconstruction.
403bdd1243dSDimitry Andric     if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
404753f127fSDimitry Andric       return false;
405753f127fSDimitry Andric   }
406753f127fSDimitry Andric   // Tokens that do not have a macro context are tokens in that are part of the
407753f127fSDimitry Andric   // macro call that have not taken part in expansion.
408753f127fSDimitry Andric   if (!Token->MacroCtx) {
409753f127fSDimitry Andric     // Put the parentheses and commas of a macro call into the same line;
410753f127fSDimitry Andric     // if the arguments produce new unwrapped lines, they will become children
411753f127fSDimitry Andric     // of the corresponding opening parenthesis or comma tokens in the
412753f127fSDimitry Andric     // reconstructed call.
413753f127fSDimitry Andric     if (Token->is(tok::l_paren)) {
414753f127fSDimitry Andric       MacroCallStructure.push_back(MacroCallState(
415753f127fSDimitry Andric           currentLine(), parentLine().Tokens.back()->Tok, Token));
416753f127fSDimitry Andric       // All tokens that are children of the previous line's last token in the
417753f127fSDimitry Andric       // reconstructed token stream will now be children of the l_paren token.
418753f127fSDimitry Andric       // For example, for the line containing the macro calls:
419753f127fSDimitry Andric       //   auto x = ID({ID(2)});
420753f127fSDimitry Andric       // We will build up a map <null> -> ( -> ( with the first and second
421753f127fSDimitry Andric       // l_paren of the macro call respectively. New lines that come in with a
422753f127fSDimitry Andric       // <null> parent will then become children of the l_paren token of the
423753f127fSDimitry Andric       // currently innermost macro call.
424753f127fSDimitry Andric       SpelledParentToReconstructedParent[MacroCallStructure.back()
425753f127fSDimitry Andric                                              .ParentLastToken] = Token;
426753f127fSDimitry Andric       appendToken(Token);
427753f127fSDimitry Andric       prepareParent(Token, /*NewLine=*/true);
428753f127fSDimitry Andric       Token->MacroParent = true;
429753f127fSDimitry Andric       return false;
430753f127fSDimitry Andric     }
431753f127fSDimitry Andric     if (!MacroCallStructure.empty()) {
432753f127fSDimitry Andric       if (Token->is(tok::comma)) {
433753f127fSDimitry Andric         // Make new lines inside the next argument children of the comma token.
434753f127fSDimitry Andric         SpelledParentToReconstructedParent
435753f127fSDimitry Andric             [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
436753f127fSDimitry Andric         Token->MacroParent = true;
437753f127fSDimitry Andric         appendToken(Token, MacroCallStructure.back().Line);
438753f127fSDimitry Andric         prepareParent(Token, /*NewLine=*/true);
439753f127fSDimitry Andric         return true;
440753f127fSDimitry Andric       }
441753f127fSDimitry Andric       if (Token->is(tok::r_paren)) {
442753f127fSDimitry Andric         appendToken(Token, MacroCallStructure.back().Line);
443753f127fSDimitry Andric         SpelledParentToReconstructedParent.erase(
444753f127fSDimitry Andric             MacroCallStructure.back().ParentLastToken);
445753f127fSDimitry Andric         MacroCallStructure.pop_back();
446753f127fSDimitry Andric         return false;
447753f127fSDimitry Andric       }
448753f127fSDimitry Andric     }
449753f127fSDimitry Andric   }
450753f127fSDimitry Andric   // Note that any tokens that are tagged with MR_None have been passed as
451753f127fSDimitry Andric   // arguments to the macro that have not been expanded, for example:
452753f127fSDimitry Andric   // Given: #define ID(X) x
453753f127fSDimitry Andric   // When calling: ID(a, b)
454753f127fSDimitry Andric   // 'b' will be part of the reconstructed token stream, but tagged MR_None.
455753f127fSDimitry Andric   // Given that erroring out in this case would be disruptive, we continue
456753f127fSDimitry Andric   // pushing the (unformatted) token.
457753f127fSDimitry Andric   // FIXME: This can lead to unfortunate formatting decisions - give the user
458753f127fSDimitry Andric   // a hint that their macro definition is broken.
459753f127fSDimitry Andric   appendToken(Token);
460753f127fSDimitry Andric   return false;
461753f127fSDimitry Andric }
462753f127fSDimitry Andric 
finalize()463753f127fSDimitry Andric void MacroCallReconstructor::finalize() {
464753f127fSDimitry Andric #ifndef NDEBUG
465753f127fSDimitry Andric   assert(State != Finalized && finished());
466753f127fSDimitry Andric   State = Finalized;
467753f127fSDimitry Andric #endif
468753f127fSDimitry Andric 
469753f127fSDimitry Andric   // We created corresponding unwrapped lines for each incoming line as children
470753f127fSDimitry Andric   // the the toplevel null token.
471753f127fSDimitry Andric   assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
472753f127fSDimitry Andric   LLVM_DEBUG({
473753f127fSDimitry Andric     llvm::dbgs() << "Finalizing reconstructed lines:\n";
474753f127fSDimitry Andric     debug(Result, 0);
475753f127fSDimitry Andric   });
476753f127fSDimitry Andric 
477753f127fSDimitry Andric   // The first line becomes the top level line in the resulting unwrapped line.
478753f127fSDimitry Andric   LineNode &Top = *Result.Tokens.front();
479753f127fSDimitry Andric   auto *I = Top.Children.begin();
480753f127fSDimitry Andric   // Every subsequent line will become a child of the last token in the previous
481753f127fSDimitry Andric   // line, which is the token prior to the first token in the line.
482753f127fSDimitry Andric   LineNode *Last = (*I)->Tokens.back().get();
483753f127fSDimitry Andric   ++I;
484753f127fSDimitry Andric   for (auto *E = Top.Children.end(); I != E; ++I) {
485753f127fSDimitry Andric     assert(Last->Children.empty());
486753f127fSDimitry Andric     Last->Children.push_back(std::move(*I));
487753f127fSDimitry Andric 
488753f127fSDimitry Andric     // Mark the previous line's last token as generated by a macro expansion
489753f127fSDimitry Andric     // so the formatting algorithm can take that into account.
490753f127fSDimitry Andric     Last->Tok->MacroParent = true;
491753f127fSDimitry Andric 
492753f127fSDimitry Andric     Last = Last->Children.back()->Tokens.back().get();
493753f127fSDimitry Andric   }
494753f127fSDimitry Andric   Top.Children.resize(1);
495753f127fSDimitry Andric }
496753f127fSDimitry Andric 
appendToken(FormatToken * Token,ReconstructedLine * L)497753f127fSDimitry Andric void MacroCallReconstructor::appendToken(FormatToken *Token,
498753f127fSDimitry Andric                                          ReconstructedLine *L) {
499753f127fSDimitry Andric   L = L ? L : currentLine();
500753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
501753f127fSDimitry Andric   L->Tokens.push_back(std::make_unique<LineNode>(Token));
502753f127fSDimitry Andric }
503753f127fSDimitry Andric 
504753f127fSDimitry Andric UnwrappedLine
createUnwrappedLine(const ReconstructedLine & Line,int Level)505753f127fSDimitry Andric MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
506753f127fSDimitry Andric                                             int Level) {
507753f127fSDimitry Andric   UnwrappedLine Result;
508753f127fSDimitry Andric   Result.Level = Level;
509753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
510753f127fSDimitry Andric     Result.Tokens.push_back(N->Tok);
511753f127fSDimitry Andric     UnwrappedLineNode &Current = Result.Tokens.back();
512753f127fSDimitry Andric     for (const auto &Child : N->Children) {
513753f127fSDimitry Andric       if (Child->Tokens.empty())
514753f127fSDimitry Andric         continue;
515753f127fSDimitry Andric       Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
516753f127fSDimitry Andric     }
517753f127fSDimitry Andric     if (Current.Children.size() == 1 &&
518753f127fSDimitry Andric         Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
519753f127fSDimitry Andric       Result.Tokens.splice(Result.Tokens.end(),
520753f127fSDimitry Andric                            Current.Children.front().Tokens);
521753f127fSDimitry Andric       Current.Children.clear();
522753f127fSDimitry Andric     }
523753f127fSDimitry Andric   }
524753f127fSDimitry Andric   return Result;
525753f127fSDimitry Andric }
526753f127fSDimitry Andric 
debug(const ReconstructedLine & Line,int Level)527753f127fSDimitry Andric void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
528753f127fSDimitry Andric   for (int i = 0; i < Level; ++i)
529753f127fSDimitry Andric     llvm::dbgs() << " ";
530753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
531753f127fSDimitry Andric     if (!N)
532753f127fSDimitry Andric       continue;
533753f127fSDimitry Andric     if (N->Tok)
534753f127fSDimitry Andric       llvm::dbgs() << N->Tok->TokenText << " ";
535753f127fSDimitry Andric     for (const auto &Child : N->Children) {
536753f127fSDimitry Andric       llvm::dbgs() << "\n";
537753f127fSDimitry Andric       debug(*Child, Level + 1);
538753f127fSDimitry Andric       for (int i = 0; i < Level; ++i)
539753f127fSDimitry Andric         llvm::dbgs() << " ";
540753f127fSDimitry Andric     }
541753f127fSDimitry Andric   }
542753f127fSDimitry Andric   llvm::dbgs() << "\n";
543753f127fSDimitry Andric }
544753f127fSDimitry Andric 
545753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine &
parentLine()546753f127fSDimitry Andric MacroCallReconstructor::parentLine() {
547753f127fSDimitry Andric   return **std::prev(std::prev(ActiveReconstructedLines.end()));
548753f127fSDimitry Andric }
549753f127fSDimitry Andric 
550753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine *
currentLine()551753f127fSDimitry Andric MacroCallReconstructor::currentLine() {
552753f127fSDimitry Andric   return ActiveReconstructedLines.back();
553753f127fSDimitry Andric }
554753f127fSDimitry Andric 
MacroCallState(MacroCallReconstructor::ReconstructedLine * Line,FormatToken * ParentLastToken,FormatToken * MacroCallLParen)555753f127fSDimitry Andric MacroCallReconstructor::MacroCallState::MacroCallState(
556753f127fSDimitry Andric     MacroCallReconstructor::ReconstructedLine *Line,
557753f127fSDimitry Andric     FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
558753f127fSDimitry Andric     : Line(Line), ParentLastToken(ParentLastToken),
559753f127fSDimitry Andric       MacroCallLParen(MacroCallLParen) {
560753f127fSDimitry Andric   LLVM_DEBUG(
561753f127fSDimitry Andric       llvm::dbgs() << "ParentLastToken: "
562753f127fSDimitry Andric                    << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
563753f127fSDimitry Andric                    << "\n");
564753f127fSDimitry Andric 
565753f127fSDimitry Andric   assert(MacroCallLParen->is(tok::l_paren));
566753f127fSDimitry Andric }
567753f127fSDimitry Andric 
568753f127fSDimitry Andric } // namespace format
569753f127fSDimitry Andric } // namespace clang
570