123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This class represents the Lexer for tablegen files.
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
- #define LLVM_LIB_TABLEGEN_TGLEXER_H
- #include "llvm/ADT/StringRef.h"
- #include "llvm/ADT/StringSet.h"
- #include "llvm/Support/DataTypes.h"
- #include "llvm/Support/SMLoc.h"
- #include <cassert>
- #include <memory>
- #include <set>
- #include <string>
- #include <vector>
- namespace llvm {
- template <typename T> class ArrayRef;
- class SourceMgr;
- class Twine;
- namespace tgtok {
- enum TokKind {
- // Markers
- Eof, Error,
- // Tokens with no info.
- minus, plus, // - +
- l_square, r_square, // [ ]
- l_brace, r_brace, // { }
- l_paren, r_paren, // ( )
- less, greater, // < >
- colon, semi, // : ;
- comma, dot, // , .
- equal, question, // = ?
- paste, // #
- dotdotdot, // ...
- // Reserved keywords. ('ElseKW' is named to distinguish it from the
- // existing 'Else' that means the preprocessor #else.)
- Assert, Bit, Bits, Class, Code, Dag, Def, Defm, Defset, Defvar, ElseKW,
- FalseKW, Field, Foreach, If, In, Include, Int, Let, List, MultiClass,
- String, Then, TrueKW,
- // Bang operators.
- XConcat, XADD, XSUB, XMUL, XDIV, XNOT, XLOG2, XAND, XOR, XXOR, XSRA, XSRL,
- XSHL, XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XFind,
- XCast, XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
- XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
- XExists, XListRemove,
- // Boolean literals.
- TrueVal, FalseVal,
- // Integer value.
- IntVal,
- // Binary constant. Note that these are sized according to the number of
- // bits given.
- BinaryIntVal,
- // String valued tokens.
- Id, StrVal, VarName, CodeFragment,
- // Preprocessing tokens for internal usage by the lexer.
- // They are never returned as a result of Lex().
- Ifdef, Ifndef, Else, Endif, Define
- };
- }
- /// TGLexer - TableGen Lexer class.
- class TGLexer {
- SourceMgr &SrcMgr;
- const char *CurPtr = nullptr;
- StringRef CurBuf;
- // Information about the current token.
- const char *TokStart = nullptr;
- tgtok::TokKind CurCode = tgtok::TokKind::Eof;
- std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
- int64_t CurIntVal = 0; // This is valid for IntVal.
- /// CurBuffer - This is the current buffer index we're lexing from as managed
- /// by the SourceMgr object.
- unsigned CurBuffer = 0;
- public:
- typedef std::set<std::string> DependenciesSetTy;
- private:
- /// Dependencies - This is the list of all included files.
- DependenciesSetTy Dependencies;
- public:
- TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
- tgtok::TokKind Lex() {
- return CurCode = LexToken(CurPtr == CurBuf.begin());
- }
- const DependenciesSetTy &getDependencies() const {
- return Dependencies;
- }
- tgtok::TokKind getCode() const { return CurCode; }
- const std::string &getCurStrVal() const {
- assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
- CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
- "This token doesn't have a string value");
- return CurStrVal;
- }
- int64_t getCurIntVal() const {
- assert(CurCode == tgtok::IntVal && "This token isn't an integer");
- return CurIntVal;
- }
- std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
- assert(CurCode == tgtok::BinaryIntVal &&
- "This token isn't a binary integer");
- return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
- }
- SMLoc getLoc() const;
- SMRange getLocRange() const;
- private:
- /// LexToken - Read the next token and return its code.
- tgtok::TokKind LexToken(bool FileOrLineStart = false);
- tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
- tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
- int getNextChar();
- int peekNextChar(int Index) const;
- void SkipBCPLComment();
- bool SkipCComment();
- tgtok::TokKind LexIdentifier();
- bool LexInclude();
- tgtok::TokKind LexString();
- tgtok::TokKind LexVarName();
- tgtok::TokKind LexNumber();
- tgtok::TokKind LexBracket();
- tgtok::TokKind LexExclaim();
- // Process EOF encountered in LexToken().
- // If EOF is met in an include file, then the method will update
- // CurPtr, CurBuf and preprocessing include stack, and return true.
- // If EOF is met in the top-level file, then the method will
- // update and check the preprocessing include stack, and return false.
- bool processEOF();
- // *** Structures and methods for preprocessing support ***
- // A set of macro names that are defined either via command line or
- // by using:
- // #define NAME
- StringSet<> DefinedMacros;
- // Each of #ifdef and #else directives has a descriptor associated
- // with it.
- //
- // An ordered list of preprocessing controls defined by #ifdef/#else
- // directives that are in effect currently is called preprocessing
- // control stack. It is represented as a vector of PreprocessorControlDesc's.
- //
- // The control stack is updated according to the following rules:
- //
- // For each #ifdef we add an element to the control stack.
- // For each #else we replace the top element with a descriptor
- // with an inverted IsDefined value.
- // For each #endif we pop the top element from the control stack.
- //
- // When CurPtr reaches the current buffer's end, the control stack
- // must be empty, i.e. #ifdef and the corresponding #endif
- // must be located in the same file.
- struct PreprocessorControlDesc {
- // Either tgtok::Ifdef or tgtok::Else.
- tgtok::TokKind Kind;
- // True, if the condition for this directive is true, false - otherwise.
- // Examples:
- // #ifdef NAME : true, if NAME is defined, false - otherwise.
- // ...
- // #else : false, if NAME is defined, true - otherwise.
- bool IsDefined;
- // Pointer into CurBuf to the beginning of the preprocessing directive
- // word, e.g.:
- // #ifdef NAME
- // ^ - SrcPos
- SMLoc SrcPos;
- };
- // We want to disallow code like this:
- // file1.td:
- // #define NAME
- // #ifdef NAME
- // include "file2.td"
- // EOF
- // file2.td:
- // #endif
- // EOF
- //
- // To do this, we clear the preprocessing control stack on entry
- // to each of the included file. PrepIncludeStack is used to store
- // preprocessing control stacks for the current file and all its
- // parent files. The back() element is the preprocessing control
- // stack for the current file.
- std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
- PrepIncludeStack;
- // Validate that the current preprocessing control stack is empty,
- // since we are about to exit a file, and pop the include stack.
- //
- // If IncludeStackMustBeEmpty is true, the include stack must be empty
- // after the popping, otherwise, the include stack must not be empty
- // after the popping. Basically, the include stack must be empty
- // only if we exit the "top-level" file (i.e. finish lexing).
- //
- // The method returns false, if the current preprocessing control stack
- // is not empty (e.g. there is an unterminated #ifdef/#else),
- // true - otherwise.
- bool prepExitInclude(bool IncludeStackMustBeEmpty);
- // Look ahead for a preprocessing directive starting from CurPtr. The caller
- // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
- // a preprocessing directive word followed by a whitespace, then it returns
- // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
- //
- // CurPtr is not adjusted by this method.
- tgtok::TokKind prepIsDirective() const;
- // Given a preprocessing token kind, adjusts CurPtr to the end
- // of the preprocessing directive word. Returns true, unless
- // an unsupported token kind is passed in.
- //
- // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
- // to avoid adjusting CurPtr before we are sure that '#' is followed
- // by a preprocessing directive. If it is not, then we fall back to
- // tgtok::paste interpretation of '#'.
- bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
- // The main "exit" point from the token parsing to preprocessor.
- //
- // The method is called for CurPtr, when prepIsDirective() returns
- // true. The first parameter matches the result of prepIsDirective(),
- // denoting the actual preprocessor directive to be processed.
- //
- // If the preprocessing directive disables the tokens processing, e.g.:
- // #ifdef NAME // NAME is undefined
- // then lexPreprocessor() enters the lines-skipping mode.
- // In this mode, it does not parse any tokens, because the code under
- // the #ifdef may not even be a correct tablegen code. The preprocessor
- // looks for lines containing other preprocessing directives, which
- // may be prepended with whitespaces and C-style comments. If the line
- // does not contain a preprocessing directive, it is skipped completely.
- // Otherwise, the preprocessing directive is processed by recursively
- // calling lexPreprocessor(). The processing of the encountered
- // preprocessing directives includes updating preprocessing control stack
- // and adding new macros into DefinedMacros set.
- //
- // The second parameter controls whether lexPreprocessor() is called from
- // LexToken() (true) or recursively from lexPreprocessor() (false).
- //
- // If ReturnNextLiveToken is true, the method returns the next
- // LEX token following the current directive or following the end
- // of the disabled preprocessing region corresponding to this directive.
- // If ReturnNextLiveToken is false, the method returns the first parameter,
- // unless there were errors encountered in the disabled preprocessing
- // region - in this case, it returns tgtok::Error.
- tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
- bool ReturnNextLiveToken = true);
- // Worker method for lexPreprocessor() to skip lines after some
- // preprocessing directive up to the buffer end or to the directive
- // that re-enables token processing. The method returns true
- // upon processing the next directive that re-enables tokens
- // processing. False is returned if an error was encountered.
- //
- // Note that prepSkipRegion() calls lexPreprocessor() to process
- // encountered preprocessing directives. In this case, the second
- // parameter to lexPreprocessor() is set to false. Being passed
- // false ReturnNextLiveToken, lexPreprocessor() must never call
- // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
- // to prepSkipRegion() and checking that it is never set to false.
- bool prepSkipRegion(bool MustNeverBeFalse);
- // Lex name of the macro after either #ifdef or #define. We could have used
- // LexIdentifier(), but it has special handling of "include" word, which
- // could result in awkward diagnostic errors. Consider:
- // ----
- // #ifdef include
- // class ...
- // ----
- // LexIdentifier() will engage LexInclude(), which will complain about
- // missing file with name "class". Instead, prepLexMacroName() will treat
- // "include" as a normal macro name.
- //
- // On entry, CurPtr points to the end of a preprocessing directive word.
- // The method allows for whitespaces between the preprocessing directive
- // and the macro name. The allowed whitespaces are ' ' and '\t'.
- //
- // If the first non-whitespace symbol after the preprocessing directive
- // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
- // the method updates TokStart to the position of the first non-whitespace
- // symbol, sets CurPtr to the position of the macro name's last symbol,
- // and returns a string reference to the macro name. Otherwise,
- // TokStart is set to the first non-whitespace symbol after the preprocessing
- // directive, and the method returns an empty string reference.
- //
- // In all cases, TokStart may be used to point to the word following
- // the preprocessing directive.
- StringRef prepLexMacroName();
- // Skip any whitespaces starting from CurPtr. The method is used
- // only in the lines-skipping mode to find the first non-whitespace
- // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
- // and '\r'. The method skips C-style comments as well, because
- // it is used to find the beginning of the preprocessing directive.
- // If we do not handle C-style comments the following code would
- // result in incorrect detection of a preprocessing directive:
- // /*
- // #ifdef NAME
- // */
- // As long as we skip C-style comments, the following code is correctly
- // recognized as a preprocessing directive:
- // /* first line comment
- // second line comment */ #ifdef NAME
- //
- // The method returns true upon reaching the first non-whitespace symbol
- // or EOF, CurPtr is set to point to this symbol. The method returns false,
- // if an error occurred during skipping of a C-style comment.
- bool prepSkipLineBegin();
- // Skip any whitespaces or comments after a preprocessing directive.
- // The method returns true upon reaching either end of the line
- // or end of the file. If there is a multiline C-style comment
- // after the preprocessing directive, the method skips
- // the comment, so the final CurPtr may point to one of the next lines.
- // The method returns false, if an error occurred during skipping
- // C- or C++-style comment, or a non-whitespace symbol appears
- // after the preprocessing directive.
- //
- // The method maybe called both during lines-skipping and tokens
- // processing. It actually verifies that only whitespaces or/and
- // comments follow a preprocessing directive.
- //
- // After the execution of this mehod, CurPtr points either to new line
- // symbol, buffer end or non-whitespace symbol following the preprocesing
- // directive.
- bool prepSkipDirectiveEnd();
- // Skip all symbols to the end of the line/file.
- // The method adjusts CurPtr, so that it points to either new line
- // symbol in the current line or the buffer end.
- void prepSkipToLineEnd();
- // Return true, if the current preprocessor control stack is such that
- // we should allow lexer to process the next token, false - otherwise.
- //
- // In particular, the method returns true, if all the #ifdef/#else
- // controls on the stack have their IsDefined member set to true.
- bool prepIsProcessingEnabled();
- // Report an error, if we reach EOF with non-empty preprocessing control
- // stack. This means there is no matching #endif for the previous
- // #ifdef/#else.
- void prepReportPreprocessorStackError();
- };
- } // end namespace llvm
- #endif
|