//===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// This file contains the implementation of MacroCallReconstructor, which fits /// an reconstructed macro call to a parsed set of UnwrappedLines. /// //===----------------------------------------------------------------------===// #include "Macros.h" #include "UnwrappedLineParser.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Support/Debug.h" #include #define DEBUG_TYPE "format-reconstruct" namespace clang { namespace format { // Call \p Call for each token in the unwrapped line given, passing // the token, its parent and whether it is the first token in the line. template void forEachToken(const UnwrappedLine &Line, const T &Call, FormatToken *Parent = nullptr) { bool First = true; for (const auto &N : Line.Tokens) { Call(N.Tok, Parent, First); First = false; for (const auto &Child : N.Children) forEachToken(Child, Call, N.Tok); } } MacroCallReconstructor::MacroCallReconstructor( unsigned Level, const llvm::DenseMap> &ActiveExpansions) : Level(Level), IdToReconstructed(ActiveExpansions) { Result.Tokens.push_back(std::make_unique()); ActiveReconstructedLines.push_back(&Result); } void MacroCallReconstructor::addLine(const UnwrappedLine &Line) { assert(State != Finalized); LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n"); forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) { add(Token, Parent, First); }); assert(InProgress || finished()); } UnwrappedLine MacroCallReconstructor::takeResult() && { finalize(); assert(Result.Tokens.size() == 1 && Result.Tokens.front()->Children.size() == 1); UnwrappedLine Final = createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level); assert(!Final.Tokens.empty()); return Final; } // Reconstruct the position of the next \p Token, given its parent \p // ExpandedParent in the incoming unwrapped line. \p First specifies whether it // is the first token in a given unwrapped line. void MacroCallReconstructor::add(FormatToken *Token, FormatToken *ExpandedParent, bool First) { LLVM_DEBUG( llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: " << (ExpandedParent ? ExpandedParent->TokenText : "") << ", First: " << First << "\n"); // In order to be able to find the correct parent in the reconstructed token // stream, we need to continue the last open reconstruction until we find the // given token if it is part of the reconstructed token stream. // // Note that hidden tokens can be part of the reconstructed stream in nested // macro calls. // For example, given // #define C(x, y) x y // #define B(x) {x} // And the call: // C(a, B(b)) // The outer macro call will be C(a, {b}), and the hidden token '}' can be // found in the reconstructed token stream of that expansion level. // In the expanded token stream // a {b} // 'b' is a child of '{'. We need to continue the open expansion of the ',' // in the call of 'C' in order to correctly set the ',' as the parent of '{', // so we later set the spelled token 'b' as a child of the ','. if (!ActiveExpansions.empty() && Token->MacroCtx && (Token->MacroCtx->Role != MR_Hidden || ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) { if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token)) First = true; } prepareParent(ExpandedParent, First); if (Token->MacroCtx) { // If this token was generated by a macro call, add the reconstructed // equivalent of the token. reconstruct(Token); } else { // Otherwise, we add it to the current line. appendToken(Token); } } // Adjusts the stack of active reconstructed lines so we're ready to push // tokens. The tokens to be pushed are children of ExpandedParent in the // expanded code. // // This may entail: // - creating a new line, if the parent is on the active line // - popping active lines, if the parent is further up the stack // // Postcondition: // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its // reconstructed replacement token as a parent (when possible) - that is, the // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2] // is the parent of ActiveReconstructedLines.back() in the reconstructed // unwrapped line. void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent, bool NewLine) { LLVM_DEBUG({ llvm::dbgs() << "ParentMap:\n"; debugParentMap(); }); // We want to find the parent in the new unwrapped line, where the expanded // parent might have been replaced during reconstruction. FormatToken *Parent = getParentInResult(ExpandedParent); LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: " << (Parent ? Parent->TokenText : "") << "\n"); FormatToken *OpenMacroParent = nullptr; if (!MacroCallStructure.empty()) { // Inside a macro expansion, it is possible to lose track of the correct // parent - either because it is already popped, for example because it was // in a different macro argument (e.g. M({, })), or when we work on invalid // code. // Thus, we use the innermost macro call's parent as the parent at which // we stop; this allows us to stay within the macro expansion and keeps // any problems confined to the extent of the macro call. OpenMacroParent = getParentInResult(MacroCallStructure.back().MacroCallLParen); LLVM_DEBUG(llvm::dbgs() << "MacroCallLParen: " << MacroCallStructure.back().MacroCallLParen->TokenText << ", OpenMacroParent: " << (OpenMacroParent ? OpenMacroParent->TokenText : "") << "\n"); } if (NewLine || (!ActiveReconstructedLines.back()->Tokens.empty() && Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) { // If we are at the first token in a new line, we want to also // create a new line in the resulting reconstructed unwrapped line. while (ActiveReconstructedLines.back()->Tokens.empty() || (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok && ActiveReconstructedLines.back()->Tokens.back()->Tok != OpenMacroParent)) { ActiveReconstructedLines.pop_back(); assert(!ActiveReconstructedLines.empty()); } assert(!ActiveReconstructedLines.empty()); ActiveReconstructedLines.back()->Tokens.back()->Children.push_back( std::make_unique()); ActiveReconstructedLines.push_back( &*ActiveReconstructedLines.back()->Tokens.back()->Children.back()); } else if (parentLine().Tokens.back()->Tok != Parent) { // If we're not the first token in a new line, pop lines until we find // the child of \c Parent in the stack. while (Parent != parentLine().Tokens.back()->Tok && parentLine().Tokens.back()->Tok && parentLine().Tokens.back()->Tok != OpenMacroParent) { ActiveReconstructedLines.pop_back(); assert(!ActiveReconstructedLines.empty()); } } assert(!ActiveReconstructedLines.empty()); } // For a given \p Parent in the incoming expanded token stream, find the // corresponding parent in the output. FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) { FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent); if (!Mapped) return Parent; for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent)) Parent = Mapped; // If we use a different token than the parent in the expanded token stream // as parent, mark it as a special parent, so the formatting code knows it // needs to have its children formatted. Parent->MacroParent = true; return Parent; } // Reconstruct a \p Token that was expanded from a macro call. void MacroCallReconstructor::reconstruct(FormatToken *Token) { assert(Token->MacroCtx); // A single token can be the only result of a macro call: // Given: #define ID(x, y) ; // And the call: ID(, ) // ';' in the expanded stream will reconstruct all of ID(, ). if (Token->MacroCtx->StartOfExpansion) { startReconstruction(Token); // If the order of tokens in the expanded token stream is not the // same as the order of tokens in the reconstructed stream, we need // to reconstruct tokens that arrive later in the stream. if (Token->MacroCtx->Role != MR_Hidden) reconstructActiveCallUntil(Token); } assert(!ActiveExpansions.empty()); if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) { assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size()); if (Token->MacroCtx->Role != MR_Hidden) { // The current token in the reconstructed token stream must be the token // we're looking for - we either arrive here after startReconstruction, // which initiates the stream to the first token, or after // continueReconstructionUntil skipped until the expected token in the // reconstructed stream at the start of add(...). assert(ActiveExpansions.back().SpelledI->Tok == Token); processNextReconstructed(); } else if (!currentLine()->Tokens.empty()) { // Map all hidden tokens to the last visible token in the output. // If the hidden token is a parent, we'll use the last visible // token as the parent of the hidden token's children. SpelledParentToReconstructedParent[Token] = currentLine()->Tokens.back()->Tok; } else { for (auto I = ActiveReconstructedLines.rbegin(), E = ActiveReconstructedLines.rend(); I != E; ++I) { if (!(*I)->Tokens.empty()) { SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok; break; } } } } if (Token->MacroCtx->EndOfExpansion) endReconstruction(Token); } // Given a \p Token that starts an expansion, reconstruct the beginning of the // macro call. // For example, given: #define ID(x) x // And the call: ID(int a) // Reconstructs: ID( void MacroCallReconstructor::startReconstruction(FormatToken *Token) { assert(Token->MacroCtx); assert(!Token->MacroCtx->ExpandedFrom.empty()); assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size()); #ifndef NDEBUG // Check that the token's reconstruction stack matches our current // reconstruction stack. for (size_t I = 0; I < ActiveExpansions.size(); ++I) { assert(ActiveExpansions[I].ID == Token->MacroCtx ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]); } #endif // Start reconstruction for all calls for which this token is the first token // generated by the call. // Note that the token's expanded from stack is inside-to-outside, and the // expansions for which this token is not the first are the outermost ones. ArrayRef StartedMacros = ArrayRef(Token->MacroCtx->ExpandedFrom) .drop_back(ActiveExpansions.size()); assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion); // We reconstruct macro calls outside-to-inside. for (FormatToken *ID : llvm::reverse(StartedMacros)) { // We found a macro call to be reconstructed; the next time our // reconstruction stack is empty we know we finished an reconstruction. #ifndef NDEBUG State = InProgress; #endif // Put the reconstructed macro call's token into our reconstruction stack. auto IU = IdToReconstructed.find(ID); assert(IU != IdToReconstructed.end()); ActiveExpansions.push_back( {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()}); // Process the macro call's identifier. processNextReconstructed(); if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE) continue; if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) { // Process the optional opening parenthesis. processNextReconstructed(); } } } // Add all tokens in the reconstruction stream to the output until we find the // given \p Token. bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) { assert(!ActiveExpansions.empty()); bool PassedMacroComma = false; // FIXME: If Token was already expanded earlier, due to // a change in order, we will not find it, but need to // skip it. while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE && ActiveExpansions.back().SpelledI->Tok != Token) { PassedMacroComma = processNextReconstructed() || PassedMacroComma; } return PassedMacroComma; } // End all reconstructions for which \p Token is the final token. void MacroCallReconstructor::endReconstruction(FormatToken *Token) { assert(Token->MacroCtx && (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion)); for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) { #ifndef NDEBUG // Check all remaining tokens but the final closing parenthesis and optional // trailing comment were already reconstructed at an inner expansion level. for (auto T = ActiveExpansions.back().SpelledI; T != ActiveExpansions.back().SpelledE; ++T) { FormatToken *Token = T->Tok; bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE || std::next(T)->Tok->isTrailingComment()) && !Token->MacroCtx && Token->is(tok::r_paren); bool TrailingComment = Token->isTrailingComment(); bool PreviousLevel = Token->MacroCtx && (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()); if (!ClosingParen && !TrailingComment && !PreviousLevel) llvm::dbgs() << "At token: " << Token->TokenText << "\n"; // In addition to the following cases, we can also run into this // when a macro call had more arguments than expected; in that case, // the comma and the remaining tokens in the macro call will potentially // end up in the line when we finish the expansion. // FIXME: Add the information which arguments are unused, and assert // one of the cases below plus reconstructed macro argument tokens. // assert(ClosingParen || TrailingComment || PreviousLevel); } #endif // Handle the remaining open tokens: // - expand the closing parenthesis, if it exists, including an optional // trailing comment // - handle tokens that were already reconstructed at an inner expansion // level // - handle tokens when a macro call had more than the expected number of // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end // up with the sequence ", b, c)" being open at the end of the // reconstruction; we want to gracefully handle that case // // FIXME: See the above debug-check for what we will need to do to be // able to assert this. for (auto T = ActiveExpansions.back().SpelledI; T != ActiveExpansions.back().SpelledE; ++T) { processNextReconstructed(); } ActiveExpansions.pop_back(); } } void MacroCallReconstructor::debugParentMap() const { llvm::DenseSet Values; for (const auto &P : SpelledParentToReconstructedParent) Values.insert(P.second); for (const auto &P : SpelledParentToReconstructedParent) { if (Values.contains(P.first)) continue; llvm::dbgs() << (P.first ? P.first->TokenText : ""); for (auto I = SpelledParentToReconstructedParent.find(P.first), E = SpelledParentToReconstructedParent.end(); I != E; I = SpelledParentToReconstructedParent.find(I->second)) { llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : ""); } llvm::dbgs() << "\n"; } } // If visible, add the next token of the reconstructed token sequence to the // output. Returns whether reconstruction passed a comma that is part of a // macro call. bool MacroCallReconstructor::processNextReconstructed() { FormatToken *Token = ActiveExpansions.back().SpelledI->Tok; ++ActiveExpansions.back().SpelledI; if (Token->MacroCtx) { // Skip tokens that are not part of the macro call. if (Token->MacroCtx->Role == MR_Hidden) return false; // Skip tokens we already expanded during an inner reconstruction. // For example, given: #define ID(x) {x} // And the call: ID(ID(f)) // We get two reconstructions: // ID(f) -> {f} // ID({f}) -> {{f}} // We reconstruct f during the first reconstruction, and skip it during the // second reconstruction. if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()) return false; } // Tokens that do not have a macro context are tokens in that are part of the // macro call that have not taken part in expansion. if (!Token->MacroCtx) { // Put the parentheses and commas of a macro call into the same line; // if the arguments produce new unwrapped lines, they will become children // of the corresponding opening parenthesis or comma tokens in the // reconstructed call. if (Token->is(tok::l_paren)) { MacroCallStructure.push_back(MacroCallState( currentLine(), parentLine().Tokens.back()->Tok, Token)); // All tokens that are children of the previous line's last token in the // reconstructed token stream will now be children of the l_paren token. // For example, for the line containing the macro calls: // auto x = ID({ID(2)}); // We will build up a map -> ( -> ( with the first and second // l_paren of the macro call respectively. New lines that come in with a // parent will then become children of the l_paren token of the // currently innermost macro call. SpelledParentToReconstructedParent[MacroCallStructure.back() .ParentLastToken] = Token; appendToken(Token); prepareParent(Token, /*NewLine=*/true); Token->MacroParent = true; return false; } if (!MacroCallStructure.empty()) { if (Token->is(tok::comma)) { // Make new lines inside the next argument children of the comma token. SpelledParentToReconstructedParent [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token; Token->MacroParent = true; appendToken(Token, MacroCallStructure.back().Line); prepareParent(Token, /*NewLine=*/true); return true; } if (Token->is(tok::r_paren)) { appendToken(Token, MacroCallStructure.back().Line); SpelledParentToReconstructedParent.erase( MacroCallStructure.back().ParentLastToken); MacroCallStructure.pop_back(); return false; } } } // Note that any tokens that are tagged with MR_None have been passed as // arguments to the macro that have not been expanded, for example: // Given: #define ID(X) x // When calling: ID(a, b) // 'b' will be part of the reconstructed token stream, but tagged MR_None. // Given that erroring out in this case would be disruptive, we continue // pushing the (unformatted) token. // FIXME: This can lead to unfortunate formatting decisions - give the user // a hint that their macro definition is broken. appendToken(Token); return false; } void MacroCallReconstructor::finalize() { #ifndef NDEBUG assert(State != Finalized && finished()); State = Finalized; #endif // We created corresponding unwrapped lines for each incoming line as children // the the toplevel null token. assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty()); LLVM_DEBUG({ llvm::dbgs() << "Finalizing reconstructed lines:\n"; debug(Result, 0); }); // The first line becomes the top level line in the resulting unwrapped line. LineNode &Top = *Result.Tokens.front(); auto *I = Top.Children.begin(); // Every subsequent line will become a child of the last token in the previous // line, which is the token prior to the first token in the line. LineNode *Last = (*I)->Tokens.back().get(); ++I; for (auto *E = Top.Children.end(); I != E; ++I) { assert(Last->Children.empty()); Last->Children.push_back(std::move(*I)); // Mark the previous line's last token as generated by a macro expansion // so the formatting algorithm can take that into account. Last->Tok->MacroParent = true; Last = Last->Children.back()->Tokens.back().get(); } Top.Children.resize(1); } void MacroCallReconstructor::appendToken(FormatToken *Token, ReconstructedLine *L) { L = L ? L : currentLine(); LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n"); L->Tokens.push_back(std::make_unique(Token)); } UnwrappedLine MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line, int Level) { UnwrappedLine Result; Result.Level = Level; for (const auto &N : Line.Tokens) { Result.Tokens.push_back(N->Tok); UnwrappedLineNode &Current = Result.Tokens.back(); for (const auto &Child : N->Children) { if (Child->Tokens.empty()) continue; Current.Children.push_back(createUnwrappedLine(*Child, Level + 1)); } if (Current.Children.size() == 1 && Current.Tok->isOneOf(tok::l_paren, tok::comma)) { Result.Tokens.splice(Result.Tokens.end(), Current.Children.front().Tokens); Current.Children.clear(); } } return Result; } void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) { for (int i = 0; i < Level; ++i) llvm::dbgs() << " "; for (const auto &N : Line.Tokens) { if (!N) continue; if (N->Tok) llvm::dbgs() << N->Tok->TokenText << " "; for (const auto &Child : N->Children) { llvm::dbgs() << "\n"; debug(*Child, Level + 1); for (int i = 0; i < Level; ++i) llvm::dbgs() << " "; } } llvm::dbgs() << "\n"; } MacroCallReconstructor::ReconstructedLine & MacroCallReconstructor::parentLine() { return **std::prev(std::prev(ActiveReconstructedLines.end())); } MacroCallReconstructor::ReconstructedLine * MacroCallReconstructor::currentLine() { return ActiveReconstructedLines.back(); } MacroCallReconstructor::MacroCallState::MacroCallState( MacroCallReconstructor::ReconstructedLine *Line, FormatToken *ParentLastToken, FormatToken *MacroCallLParen) : Line(Line), ParentLastToken(ParentLastToken), MacroCallLParen(MacroCallLParen) { LLVM_DEBUG( llvm::dbgs() << "ParentLastToken: " << (ParentLastToken ? ParentLastToken->TokenText : "") << "\n"); assert(MacroCallLParen->is(tok::l_paren)); } } // namespace format } // namespace clang