123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568 |
- //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
- //
- // The LLVM Compiler Infrastructure
- //
- // This file is distributed under the University of Illinois Open Source
- // License. See LICENSE.TXT for details.
- //
- //===----------------------------------------------------------------------===//
- ///
- /// \file
- /// This file contains the implementation of MacroCallReconstructor, which fits
- /// an reconstructed macro call to a parsed set of UnwrappedLines.
- ///
- //===----------------------------------------------------------------------===//
- #include "Macros.h"
- #include "UnwrappedLineParser.h"
- #include "clang/Basic/TokenKinds.h"
- #include "llvm/ADT/DenseSet.h"
- #include "llvm/Support/Debug.h"
- #include <cassert>
- #define DEBUG_TYPE "format-reconstruct"
- namespace clang {
- namespace format {
- // Call \p Call for each token in the unwrapped line given, passing
- // the token, its parent and whether it is the first token in the line.
- template <typename T>
- void forEachToken(const UnwrappedLine &Line, const T &Call,
- FormatToken *Parent = nullptr) {
- bool First = true;
- for (const auto &N : Line.Tokens) {
- Call(N.Tok, Parent, First);
- First = false;
- for (const auto &Child : N.Children)
- forEachToken(Child, Call, N.Tok);
- }
- }
- MacroCallReconstructor::MacroCallReconstructor(
- unsigned Level,
- const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
- &ActiveExpansions)
- : Level(Level), IdToReconstructed(ActiveExpansions) {
- Result.Tokens.push_back(std::make_unique<LineNode>());
- ActiveReconstructedLines.push_back(&Result);
- }
- void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
- assert(State != Finalized);
- LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
- forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
- add(Token, Parent, First);
- });
- assert(InProgress || finished());
- }
- UnwrappedLine MacroCallReconstructor::takeResult() && {
- finalize();
- assert(Result.Tokens.size() == 1 &&
- Result.Tokens.front()->Children.size() == 1);
- UnwrappedLine Final =
- createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
- assert(!Final.Tokens.empty());
- return Final;
- }
- // Reconstruct the position of the next \p Token, given its parent \p
- // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
- // is the first token in a given unwrapped line.
- void MacroCallReconstructor::add(FormatToken *Token,
- FormatToken *ExpandedParent, bool First) {
- LLVM_DEBUG(
- llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
- << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
- << ", First: " << First << "\n");
- // In order to be able to find the correct parent in the reconstructed token
- // stream, we need to continue the last open reconstruction until we find the
- // given token if it is part of the reconstructed token stream.
- //
- // Note that hidden tokens can be part of the reconstructed stream in nested
- // macro calls.
- // For example, given
- // #define C(x, y) x y
- // #define B(x) {x}
- // And the call:
- // C(a, B(b))
- // The outer macro call will be C(a, {b}), and the hidden token '}' can be
- // found in the reconstructed token stream of that expansion level.
- // In the expanded token stream
- // a {b}
- // 'b' is a child of '{'. We need to continue the open expansion of the ','
- // in the call of 'C' in order to correctly set the ',' as the parent of '{',
- // so we later set the spelled token 'b' as a child of the ','.
- if (!ActiveExpansions.empty() && Token->MacroCtx &&
- (Token->MacroCtx->Role != MR_Hidden ||
- ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
- if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
- First = true;
- }
- prepareParent(ExpandedParent, First);
- if (Token->MacroCtx) {
- // If this token was generated by a macro call, add the reconstructed
- // equivalent of the token.
- reconstruct(Token);
- } else {
- // Otherwise, we add it to the current line.
- appendToken(Token);
- }
- }
- // Adjusts the stack of active reconstructed lines so we're ready to push
- // tokens. The tokens to be pushed are children of ExpandedParent in the
- // expanded code.
- //
- // This may entail:
- // - creating a new line, if the parent is on the active line
- // - popping active lines, if the parent is further up the stack
- //
- // Postcondition:
- // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
- // reconstructed replacement token as a parent (when possible) - that is, the
- // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
- // is the parent of ActiveReconstructedLines.back() in the reconstructed
- // unwrapped line.
- void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
- bool NewLine) {
- LLVM_DEBUG({
- llvm::dbgs() << "ParentMap:\n";
- debugParentMap();
- });
- // We want to find the parent in the new unwrapped line, where the expanded
- // parent might have been replaced during reconstruction.
- FormatToken *Parent = getParentInResult(ExpandedParent);
- LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
- << (Parent ? Parent->TokenText : "<null>") << "\n");
- FormatToken *OpenMacroParent = nullptr;
- if (!MacroCallStructure.empty()) {
- // Inside a macro expansion, it is possible to lose track of the correct
- // parent - either because it is already popped, for example because it was
- // in a different macro argument (e.g. M({, })), or when we work on invalid
- // code.
- // Thus, we use the innermost macro call's parent as the parent at which
- // we stop; this allows us to stay within the macro expansion and keeps
- // any problems confined to the extent of the macro call.
- OpenMacroParent =
- getParentInResult(MacroCallStructure.back().MacroCallLParen);
- LLVM_DEBUG(llvm::dbgs()
- << "MacroCallLParen: "
- << MacroCallStructure.back().MacroCallLParen->TokenText
- << ", OpenMacroParent: "
- << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
- << "\n");
- }
- if (NewLine ||
- (!ActiveReconstructedLines.back()->Tokens.empty() &&
- Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
- // If we are at the first token in a new line, we want to also
- // create a new line in the resulting reconstructed unwrapped line.
- while (ActiveReconstructedLines.back()->Tokens.empty() ||
- (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
- ActiveReconstructedLines.back()->Tokens.back()->Tok !=
- OpenMacroParent)) {
- ActiveReconstructedLines.pop_back();
- assert(!ActiveReconstructedLines.empty());
- }
- assert(!ActiveReconstructedLines.empty());
- ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
- std::make_unique<ReconstructedLine>());
- ActiveReconstructedLines.push_back(
- &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
- } else if (parentLine().Tokens.back()->Tok != Parent) {
- // If we're not the first token in a new line, pop lines until we find
- // the child of \c Parent in the stack.
- while (Parent != parentLine().Tokens.back()->Tok &&
- parentLine().Tokens.back()->Tok &&
- parentLine().Tokens.back()->Tok != OpenMacroParent) {
- ActiveReconstructedLines.pop_back();
- assert(!ActiveReconstructedLines.empty());
- }
- }
- assert(!ActiveReconstructedLines.empty());
- }
- // For a given \p Parent in the incoming expanded token stream, find the
- // corresponding parent in the output.
- FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
- FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
- if (!Mapped)
- return Parent;
- for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
- Parent = Mapped;
- // If we use a different token than the parent in the expanded token stream
- // as parent, mark it as a special parent, so the formatting code knows it
- // needs to have its children formatted.
- Parent->MacroParent = true;
- return Parent;
- }
- // Reconstruct a \p Token that was expanded from a macro call.
- void MacroCallReconstructor::reconstruct(FormatToken *Token) {
- assert(Token->MacroCtx);
- // A single token can be the only result of a macro call:
- // Given: #define ID(x, y) ;
- // And the call: ID(<some>, <tokens>)
- // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
- if (Token->MacroCtx->StartOfExpansion) {
- startReconstruction(Token);
- // If the order of tokens in the expanded token stream is not the
- // same as the order of tokens in the reconstructed stream, we need
- // to reconstruct tokens that arrive later in the stream.
- if (Token->MacroCtx->Role != MR_Hidden)
- reconstructActiveCallUntil(Token);
- }
- assert(!ActiveExpansions.empty());
- if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
- assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
- if (Token->MacroCtx->Role != MR_Hidden) {
- // The current token in the reconstructed token stream must be the token
- // we're looking for - we either arrive here after startReconstruction,
- // which initiates the stream to the first token, or after
- // continueReconstructionUntil skipped until the expected token in the
- // reconstructed stream at the start of add(...).
- assert(ActiveExpansions.back().SpelledI->Tok == Token);
- processNextReconstructed();
- } else if (!currentLine()->Tokens.empty()) {
- // Map all hidden tokens to the last visible token in the output.
- // If the hidden token is a parent, we'll use the last visible
- // token as the parent of the hidden token's children.
- SpelledParentToReconstructedParent[Token] =
- currentLine()->Tokens.back()->Tok;
- } else {
- for (auto I = ActiveReconstructedLines.rbegin(),
- E = ActiveReconstructedLines.rend();
- I != E; ++I) {
- if (!(*I)->Tokens.empty()) {
- SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
- break;
- }
- }
- }
- }
- if (Token->MacroCtx->EndOfExpansion)
- endReconstruction(Token);
- }
- // Given a \p Token that starts an expansion, reconstruct the beginning of the
- // macro call.
- // For example, given: #define ID(x) x
- // And the call: ID(int a)
- // Reconstructs: ID(
- void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
- assert(Token->MacroCtx);
- assert(!Token->MacroCtx->ExpandedFrom.empty());
- assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
- #ifndef NDEBUG
- // Check that the token's reconstruction stack matches our current
- // reconstruction stack.
- for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
- assert(ActiveExpansions[I].ID ==
- Token->MacroCtx
- ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
- }
- #endif
- // Start reconstruction for all calls for which this token is the first token
- // generated by the call.
- // Note that the token's expanded from stack is inside-to-outside, and the
- // expansions for which this token is not the first are the outermost ones.
- ArrayRef<FormatToken *> StartedMacros =
- ArrayRef(Token->MacroCtx->ExpandedFrom)
- .drop_back(ActiveExpansions.size());
- assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
- // We reconstruct macro calls outside-to-inside.
- for (FormatToken *ID : llvm::reverse(StartedMacros)) {
- // We found a macro call to be reconstructed; the next time our
- // reconstruction stack is empty we know we finished an reconstruction.
- #ifndef NDEBUG
- State = InProgress;
- #endif
- // Put the reconstructed macro call's token into our reconstruction stack.
- auto IU = IdToReconstructed.find(ID);
- assert(IU != IdToReconstructed.end());
- ActiveExpansions.push_back(
- {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
- // Process the macro call's identifier.
- processNextReconstructed();
- if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
- continue;
- if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
- // Process the optional opening parenthesis.
- processNextReconstructed();
- }
- }
- }
- // Add all tokens in the reconstruction stream to the output until we find the
- // given \p Token.
- bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
- assert(!ActiveExpansions.empty());
- bool PassedMacroComma = false;
- // FIXME: If Token was already expanded earlier, due to
- // a change in order, we will not find it, but need to
- // skip it.
- while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
- ActiveExpansions.back().SpelledI->Tok != Token) {
- PassedMacroComma = processNextReconstructed() || PassedMacroComma;
- }
- return PassedMacroComma;
- }
- // End all reconstructions for which \p Token is the final token.
- void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
- assert(Token->MacroCtx &&
- (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
- for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
- #ifndef NDEBUG
- // Check all remaining tokens but the final closing parenthesis and optional
- // trailing comment were already reconstructed at an inner expansion level.
- for (auto T = ActiveExpansions.back().SpelledI;
- T != ActiveExpansions.back().SpelledE; ++T) {
- FormatToken *Token = T->Tok;
- bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
- std::next(T)->Tok->isTrailingComment()) &&
- !Token->MacroCtx && Token->is(tok::r_paren);
- bool TrailingComment = Token->isTrailingComment();
- bool PreviousLevel =
- Token->MacroCtx &&
- (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
- if (!ClosingParen && !TrailingComment && !PreviousLevel)
- llvm::dbgs() << "At token: " << Token->TokenText << "\n";
- // In addition to the following cases, we can also run into this
- // when a macro call had more arguments than expected; in that case,
- // the comma and the remaining tokens in the macro call will potentially
- // end up in the line when we finish the expansion.
- // FIXME: Add the information which arguments are unused, and assert
- // one of the cases below plus reconstructed macro argument tokens.
- // assert(ClosingParen || TrailingComment || PreviousLevel);
- }
- #endif
- // Handle the remaining open tokens:
- // - expand the closing parenthesis, if it exists, including an optional
- // trailing comment
- // - handle tokens that were already reconstructed at an inner expansion
- // level
- // - handle tokens when a macro call had more than the expected number of
- // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
- // up with the sequence ", b, c)" being open at the end of the
- // reconstruction; we want to gracefully handle that case
- //
- // FIXME: See the above debug-check for what we will need to do to be
- // able to assert this.
- for (auto T = ActiveExpansions.back().SpelledI;
- T != ActiveExpansions.back().SpelledE; ++T) {
- processNextReconstructed();
- }
- ActiveExpansions.pop_back();
- }
- }
- void MacroCallReconstructor::debugParentMap() const {
- llvm::DenseSet<FormatToken *> Values;
- for (const auto &P : SpelledParentToReconstructedParent)
- Values.insert(P.second);
- for (const auto &P : SpelledParentToReconstructedParent) {
- if (Values.contains(P.first))
- continue;
- llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
- for (auto I = SpelledParentToReconstructedParent.find(P.first),
- E = SpelledParentToReconstructedParent.end();
- I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
- llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
- }
- llvm::dbgs() << "\n";
- }
- }
- // If visible, add the next token of the reconstructed token sequence to the
- // output. Returns whether reconstruction passed a comma that is part of a
- // macro call.
- bool MacroCallReconstructor::processNextReconstructed() {
- FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
- ++ActiveExpansions.back().SpelledI;
- if (Token->MacroCtx) {
- // Skip tokens that are not part of the macro call.
- if (Token->MacroCtx->Role == MR_Hidden)
- return false;
- // Skip tokens we already expanded during an inner reconstruction.
- // For example, given: #define ID(x) {x}
- // And the call: ID(ID(f))
- // We get two reconstructions:
- // ID(f) -> {f}
- // ID({f}) -> {{f}}
- // We reconstruct f during the first reconstruction, and skip it during the
- // second reconstruction.
- if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
- return false;
- }
- // Tokens that do not have a macro context are tokens in that are part of the
- // macro call that have not taken part in expansion.
- if (!Token->MacroCtx) {
- // Put the parentheses and commas of a macro call into the same line;
- // if the arguments produce new unwrapped lines, they will become children
- // of the corresponding opening parenthesis or comma tokens in the
- // reconstructed call.
- if (Token->is(tok::l_paren)) {
- MacroCallStructure.push_back(MacroCallState(
- currentLine(), parentLine().Tokens.back()->Tok, Token));
- // All tokens that are children of the previous line's last token in the
- // reconstructed token stream will now be children of the l_paren token.
- // For example, for the line containing the macro calls:
- // auto x = ID({ID(2)});
- // We will build up a map <null> -> ( -> ( with the first and second
- // l_paren of the macro call respectively. New lines that come in with a
- // <null> parent will then become children of the l_paren token of the
- // currently innermost macro call.
- SpelledParentToReconstructedParent[MacroCallStructure.back()
- .ParentLastToken] = Token;
- appendToken(Token);
- prepareParent(Token, /*NewLine=*/true);
- Token->MacroParent = true;
- return false;
- }
- if (!MacroCallStructure.empty()) {
- if (Token->is(tok::comma)) {
- // Make new lines inside the next argument children of the comma token.
- SpelledParentToReconstructedParent
- [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
- Token->MacroParent = true;
- appendToken(Token, MacroCallStructure.back().Line);
- prepareParent(Token, /*NewLine=*/true);
- return true;
- }
- if (Token->is(tok::r_paren)) {
- appendToken(Token, MacroCallStructure.back().Line);
- SpelledParentToReconstructedParent.erase(
- MacroCallStructure.back().ParentLastToken);
- MacroCallStructure.pop_back();
- return false;
- }
- }
- }
- // Note that any tokens that are tagged with MR_None have been passed as
- // arguments to the macro that have not been expanded, for example:
- // Given: #define ID(X) x
- // When calling: ID(a, b)
- // 'b' will be part of the reconstructed token stream, but tagged MR_None.
- // Given that erroring out in this case would be disruptive, we continue
- // pushing the (unformatted) token.
- // FIXME: This can lead to unfortunate formatting decisions - give the user
- // a hint that their macro definition is broken.
- appendToken(Token);
- return false;
- }
- void MacroCallReconstructor::finalize() {
- #ifndef NDEBUG
- assert(State != Finalized && finished());
- State = Finalized;
- #endif
- // We created corresponding unwrapped lines for each incoming line as children
- // the the toplevel null token.
- assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
- LLVM_DEBUG({
- llvm::dbgs() << "Finalizing reconstructed lines:\n";
- debug(Result, 0);
- });
- // The first line becomes the top level line in the resulting unwrapped line.
- LineNode &Top = *Result.Tokens.front();
- auto *I = Top.Children.begin();
- // Every subsequent line will become a child of the last token in the previous
- // line, which is the token prior to the first token in the line.
- LineNode *Last = (*I)->Tokens.back().get();
- ++I;
- for (auto *E = Top.Children.end(); I != E; ++I) {
- assert(Last->Children.empty());
- Last->Children.push_back(std::move(*I));
- // Mark the previous line's last token as generated by a macro expansion
- // so the formatting algorithm can take that into account.
- Last->Tok->MacroParent = true;
- Last = Last->Children.back()->Tokens.back().get();
- }
- Top.Children.resize(1);
- }
- void MacroCallReconstructor::appendToken(FormatToken *Token,
- ReconstructedLine *L) {
- L = L ? L : currentLine();
- LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
- L->Tokens.push_back(std::make_unique<LineNode>(Token));
- }
- UnwrappedLine
- MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
- int Level) {
- UnwrappedLine Result;
- Result.Level = Level;
- for (const auto &N : Line.Tokens) {
- Result.Tokens.push_back(N->Tok);
- UnwrappedLineNode &Current = Result.Tokens.back();
- for (const auto &Child : N->Children) {
- if (Child->Tokens.empty())
- continue;
- Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
- }
- if (Current.Children.size() == 1 &&
- Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
- Result.Tokens.splice(Result.Tokens.end(),
- Current.Children.front().Tokens);
- Current.Children.clear();
- }
- }
- return Result;
- }
- void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
- for (int i = 0; i < Level; ++i)
- llvm::dbgs() << " ";
- for (const auto &N : Line.Tokens) {
- if (!N)
- continue;
- if (N->Tok)
- llvm::dbgs() << N->Tok->TokenText << " ";
- for (const auto &Child : N->Children) {
- llvm::dbgs() << "\n";
- debug(*Child, Level + 1);
- for (int i = 0; i < Level; ++i)
- llvm::dbgs() << " ";
- }
- }
- llvm::dbgs() << "\n";
- }
- MacroCallReconstructor::ReconstructedLine &
- MacroCallReconstructor::parentLine() {
- return **std::prev(std::prev(ActiveReconstructedLines.end()));
- }
- MacroCallReconstructor::ReconstructedLine *
- MacroCallReconstructor::currentLine() {
- return ActiveReconstructedLines.back();
- }
- MacroCallReconstructor::MacroCallState::MacroCallState(
- MacroCallReconstructor::ReconstructedLine *Line,
- FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
- : Line(Line), ParentLastToken(ParentLastToken),
- MacroCallLParen(MacroCallLParen) {
- LLVM_DEBUG(
- llvm::dbgs() << "ParentLastToken: "
- << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
- << "\n");
- assert(MacroCallLParen->is(tok::l_paren));
- }
- } // namespace format
- } // namespace clang
|