123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381 |
- //===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- ///
- /// \file
- /// This file contains the main building blocks of macro support in
- /// clang-format.
- ///
- /// In order to not violate the requirement that clang-format can format files
- /// in isolation, clang-format's macro support uses expansions users provide
- /// as part of clang-format's style configuration.
- ///
- /// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support
- /// one level of expansion (\see MacroExpander for a full description of what
- /// is supported).
- ///
- /// As part of parsing, clang-format uses the MacroExpander to expand the
- /// spelled token streams into expanded token streams when it encounters a
- /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
- /// from the expanded token stream.
- /// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
- /// matches the spelled token stream into unwrapped lines that best resemble the
- /// structure of the expanded unwrapped lines. These reconstructed unwrapped
- /// lines are aliasing the tokens in the expanded token stream, so that token
- /// annotations will be reused when formatting the spelled macro calls.
- ///
- /// When formatting, clang-format annotates and formats the expanded unwrapped
- /// lines first, determining the token types. Next, it formats the spelled
- /// unwrapped lines, keeping the token types fixed, while allowing other
- /// formatting decisions to change.
- ///
- //===----------------------------------------------------------------------===//
- #ifndef CLANG_LIB_FORMAT_MACROS_H
- #define CLANG_LIB_FORMAT_MACROS_H
- #include <list>
- #include <map>
- #include <string>
- #include <vector>
- #include "FormatToken.h"
- #include "llvm/ADT/ArrayRef.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/ADT/StringRef.h"
- namespace clang {
- namespace format {
- struct UnwrappedLine;
- struct UnwrappedLineNode;
- /// Takes a set of macro definitions as strings and allows expanding calls to
- /// those macros.
- ///
- /// For example:
- /// Definition: A(x, y)=x + y
- /// Call : A(int a = 1, 2)
- /// Expansion : int a = 1 + 2
- ///
- /// Expansion does not check arity of the definition.
- /// If fewer arguments than expected are provided, the remaining parameters
- /// are considered empty:
- /// Call : A(a)
- /// Expansion: a +
- /// If more arguments than expected are provided, they will be discarded.
- ///
- /// The expander does not support:
- /// - recursive expansion
- /// - stringification
- /// - concatenation
- /// - variadic macros
- ///
- /// Furthermore, only a single expansion of each macro argument is supported,
- /// so that we cannot get conflicting formatting decisions from different
- /// expansions.
- /// Definition: A(x)=x+x
- /// Call : A(id)
- /// Expansion : id+x
- ///
- class MacroExpander {
- public:
- using ArgsList = llvm::ArrayRef<llvm::SmallVector<FormatToken *, 8>>;
- /// Construct a macro expander from a set of macro definitions.
- /// Macro definitions must be encoded as UTF-8.
- ///
- /// Each entry in \p Macros must conform to the following simple
- /// macro-definition language:
- /// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion>
- /// <params> ::= <id-list> | ""
- /// <id-list> ::= <id> | <id> "," <params>
- /// <expansion> ::= "=" <tail> | <eof>
- /// <tail> ::= <tok> <tail> | <eof>
- ///
- /// Macros that cannot be parsed will be silently discarded.
- ///
- MacroExpander(const std::vector<std::string> &Macros,
- clang::SourceManager &SourceMgr, const FormatStyle &Style,
- llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
- IdentifierTable &IdentTable);
- ~MacroExpander();
- /// Returns whether a macro \p Name is defined.
- bool defined(llvm::StringRef Name) const;
- /// Returns whether the macro has no arguments and should not consume
- /// subsequent parentheses.
- bool objectLike(llvm::StringRef Name) const;
- /// Returns the expanded stream of format tokens for \p ID, where
- /// each element in \p Args is a positional argument to the macro call.
- llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
- ArgsList Args) const;
- private:
- struct Definition;
- class DefinitionParser;
- void parseDefinition(const std::string &Macro);
- clang::SourceManager &SourceMgr;
- const FormatStyle &Style;
- llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
- IdentifierTable &IdentTable;
- SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
- llvm::StringMap<Definition> Definitions;
- };
- /// Converts a sequence of UnwrappedLines containing expanded macros into a
- /// single UnwrappedLine containing the macro calls. This UnwrappedLine may be
- /// broken into child lines, in a way that best conveys the structure of the
- /// expanded code.
- ///
- /// In the simplest case, a spelled UnwrappedLine contains one macro, and after
- /// expanding it we have one expanded UnwrappedLine. In general, macro
- /// expansions can span UnwrappedLines, and multiple macros can contribute
- /// tokens to the same line. We keep consuming expanded lines until:
- /// * all expansions that started have finished (we're not chopping any macros
- /// in half)
- /// * *and* we've reached the end of a *spelled* unwrapped line.
- ///
- /// A single UnwrappedLine represents this chunk of code.
- ///
- /// After this point, the state of the spelled/expanded stream is "in sync"
- /// (both at the start of an UnwrappedLine, with no macros open), so the
- /// Unexpander can be thrown away and parsing can continue.
- ///
- /// Given a mapping from the macro name identifier token in the macro call
- /// to the tokens of the macro call, for example:
- /// CLASSA -> CLASSA({public: void x();})
- ///
- /// When getting the formatted lines of the expansion via the \c addLine method
- /// (each '->' specifies a call to \c addLine ):
- /// -> class A {
- /// -> public:
- /// -> void x();
- /// -> };
- ///
- /// Creates the tree of unwrapped lines containing the macro call tokens so that
- /// the macro call tokens fit the semantic structure of the expanded formatted
- /// lines:
- /// -> CLASSA({
- /// -> public:
- /// -> void x();
- /// -> })
- class MacroCallReconstructor {
- public:
- /// Create an Reconstructor whose resulting \p UnwrappedLine will start at
- /// \p Level, using the map from name identifier token to the corresponding
- /// tokens of the spelled macro call.
- MacroCallReconstructor(
- unsigned Level,
- const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
- &ActiveExpansions);
- /// For the given \p Line, match all occurences of tokens expanded from a
- /// macro to unwrapped lines in the spelled macro call so that the resulting
- /// tree of unwrapped lines best resembles the structure of unwrapped lines
- /// passed in via \c addLine.
- void addLine(const UnwrappedLine &Line);
- /// Check whether at the current state there is no open macro expansion
- /// that needs to be processed to finish an macro call.
- /// Only when \c finished() is true, \c takeResult() can be called to retrieve
- /// the resulting \c UnwrappedLine.
- /// If there are multiple subsequent macro calls within an unwrapped line in
- /// the spelled token stream, the calling code may also continue to call
- /// \c addLine() when \c finished() is true.
- bool finished() const { return ActiveExpansions.empty(); }
- /// Retrieve the formatted \c UnwrappedLine containing the orginal
- /// macro calls, formatted according to the expanded token stream received
- /// via \c addLine().
- /// Generally, this line tries to have the same structure as the expanded,
- /// formatted unwrapped lines handed in via \c addLine(), with the exception
- /// that for multiple top-level lines, each subsequent line will be the
- /// child of the last token in its predecessor. This representation is chosen
- /// because it is a precondition to the formatter that we get what looks like
- /// a single statement in a single \c UnwrappedLine (i.e. matching parens).
- ///
- /// If a token in a macro argument is a child of a token in the expansion,
- /// the parent will be the corresponding token in the macro call.
- /// For example:
- /// #define C(a, b) class C { a b
- /// C(int x;, int y;)
- /// would expand to
- /// class C { int x; int y;
- /// where in a formatted line "int x;" and "int y;" would both be new separate
- /// lines.
- ///
- /// In the result, "int x;" will be a child of the opening parenthesis in "C("
- /// and "int y;" will be a child of the "," token:
- /// C (
- /// \- int x;
- /// ,
- /// \- int y;
- /// )
- UnwrappedLine takeResult() &&;
- private:
- void add(FormatToken *Token, FormatToken *ExpandedParent, bool First);
- void prepareParent(FormatToken *ExpandedParent, bool First);
- FormatToken *getParentInResult(FormatToken *Parent);
- void reconstruct(FormatToken *Token);
- void startReconstruction(FormatToken *Token);
- bool reconstructActiveCallUntil(FormatToken *Token);
- void endReconstruction(FormatToken *Token);
- bool processNextReconstructed();
- void finalize();
- struct ReconstructedLine;
- void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr);
- UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);
- void debug(const ReconstructedLine &Line, int Level);
- ReconstructedLine &parentLine();
- ReconstructedLine *currentLine();
- void debugParentMap() const;
- #ifndef NDEBUG
- enum ReconstructorState {
- Start, // No macro expansion was found in the input yet.
- InProgress, // During a macro reconstruction.
- Finalized, // Past macro reconstruction, the result is finalized.
- };
- ReconstructorState State = Start;
- #endif
- // Node in which we build up the resulting unwrapped line; this type is
- // analogous to UnwrappedLineNode.
- struct LineNode {
- LineNode() = default;
- LineNode(FormatToken *Tok) : Tok(Tok) {}
- FormatToken *Tok = nullptr;
- llvm::SmallVector<std::unique_ptr<ReconstructedLine>> Children;
- };
- // Line in which we build up the resulting unwrapped line.
- // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
- // instead of rolling our own type.
- struct ReconstructedLine {
- llvm::SmallVector<std::unique_ptr<LineNode>> Tokens;
- };
- // The line in which we collect the resulting reconstructed output.
- // To reduce special cases in the algorithm, the first level of the line
- // contains a single null token that has the reconstructed incoming
- // lines as children.
- // In the end, we stich the lines together so that each subsequent line
- // is a child of the last token of the previous line. This is necessary
- // in order to format the overall expression as a single logical line -
- // if we created separate lines, we'd format them with their own top-level
- // indent depending on the semantic structure, which is not desired.
- ReconstructedLine Result;
- // Stack of currently "open" lines, where each line's predecessor's last
- // token is the parent token for that line.
- llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines;
- // Maps from the expanded token to the token that takes its place in the
- // reconstructed token stream in terms of parent-child relationships.
- // Note that it might take multiple steps to arrive at the correct
- // parent in the output.
- // Given: #define C(a, b) []() { a; b; }
- // And a call: C(f(), g())
- // The structure in the incoming formatted unwrapped line will be:
- // []() {
- // |- f();
- // \- g();
- // }
- // with f and g being children of the opening brace.
- // In the reconstructed call:
- // C(f(), g())
- // \- f()
- // \- g()
- // We want f to be a child of the opening parenthesis and g to be a child
- // of the comma token in the macro call.
- // Thus, we map
- // { -> (
- // and add
- // ( -> ,
- // once we're past the comma in the reconstruction.
- llvm::DenseMap<FormatToken *, FormatToken *>
- SpelledParentToReconstructedParent;
- // Keeps track of a single expansion while we're reconstructing tokens it
- // generated.
- struct Expansion {
- // The identifier token of the macro call.
- FormatToken *ID;
- // Our current position in the reconstruction.
- std::list<UnwrappedLineNode>::iterator SpelledI;
- // The end of the reconstructed token sequence.
- std::list<UnwrappedLineNode>::iterator SpelledE;
- };
- // Stack of macro calls for which we're in the middle of an expansion.
- llvm::SmallVector<Expansion> ActiveExpansions;
- struct MacroCallState {
- MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken,
- FormatToken *MacroCallLParen);
- ReconstructedLine *Line;
- // The last token in the parent line or expansion, or nullptr if the macro
- // expansion is on a top-level line.
- //
- // For example, in the macro call:
- // auto f = []() { ID(1); };
- // The MacroCallState for ID will have '{' as ParentLastToken.
- //
- // In the macro call:
- // ID(ID(void f()));
- // The MacroCallState of the outer ID will have nullptr as ParentLastToken,
- // while the MacroCallState for the inner ID will have the '(' of the outer
- // ID as ParentLastToken.
- //
- // In the macro call:
- // ID2(a, ID(b));
- // The MacroCallState of ID will have ',' as ParentLastToken.
- FormatToken *ParentLastToken;
- // The l_paren of this MacroCallState's macro call.
- FormatToken *MacroCallLParen;
- };
- // Keeps track of the lines into which the opening brace/parenthesis &
- // argument separating commas for each level in the macro call go in order to
- // put the corresponding closing brace/parenthesis into the same line in the
- // output and keep track of which parents in the expanded token stream map to
- // which tokens in the reconstructed stream.
- // When an opening brace/parenthesis has children, we want the structure of
- // the output line to be:
- // |- MACRO
- // |- (
- // | \- <argument>
- // |- ,
- // | \- <argument>
- // \- )
- llvm::SmallVector<MacroCallState> MacroCallStructure;
- // Level the generated UnwrappedLine will be at.
- const unsigned Level;
- // Maps from identifier of the macro call to an unwrapped line containing
- // all tokens of the macro call.
- const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
- &IdToReconstructed;
- };
- } // namespace format
- } // namespace clang
- #endif
|