123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401 |
- /** \file
- * Defines the the class interface for an antlr3 INTSTREAM.
- *
- * Certain functionality (such as DFAs for instance) abstract the stream of tokens
- * or characters in to a steam of integers. Hence this structure should be included
- * in any stream that is able to provide the output as a stream of integers (which is anything
- * basically.
- *
- * There are no specific implementations of the methods in this interface in general. Though
- * for purposes of casting and so on, it may be necesssary to implement a function with
- * the signature in this interface which abstracts the base immplementation. In essence though
- * the base stream provides a pointer to this interface, within which it installs its
- * normal match() functions and so on. Interaces such as DFA are then passed the pANTLR3_INT_STREAM
- * and can treat any input as an int stream.
- *
- * For instance, a lexer implements a pANTLR3_BASE_RECOGNIZER, within which there is a pANTLR3_INT_STREAM.
- * However, a pANTLR3_INPUT_STREAM also provides a pANTLR3_INT_STREAM, which it has constructed from
- * it's normal interface when it was created. This is then pointed at by the pANTLR_BASE_RECOGNIZER
- * when it is intialized with a pANTLR3_INPUT_STREAM.
- *
- * Similarly if a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TOKEN_STREAM, then the
- * pANTLR3_INT_STREAM is taken from the pANTLR3_TOKEN_STREAM.
- *
- * If a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TREENODE_STREAM, then guess where
- * the pANTLR3_INT_STREAM comes from?
- *
- * Note that because the context pointer points to the actual interface structure that is providing
- * the ANTLR3_INT_STREAM it is defined as a (void *) in this interface. There is no direct implementation
- * of an ANTLR3_INT_STREAM (unless someone did not understand what I was doing here =;?P
- */
- #ifndef _ANTLR3_INTSTREAM_HPP
- #define _ANTLR3_INTSTREAM_HPP
- // [The "BSD licence"]
- // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
- //
- // All rights reserved.
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions
- // are met:
- // 1. Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- // 2. Redistributions in binary form must reproduce the above copyright
- // notice, this list of conditions and the following disclaimer in the
- // documentation and/or other materials provided with the distribution.
- // 3. The name of the author may not be used to endorse or promote products
- // derived from this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- namespace antlr3 {
- enum STREAM_TYPE
- {
- /** Type indicator for a character stream
- * \remark if a custom stream is created but it can be treated as
- * a char stream, then you may OR in this value to your type indicator
- */
- CHARSTREAM = 0x0001
- /** Type indicator for a Token stream
- * \remark if a custom stream is created but it can be treated as
- * a token stream, then you may OR in this value to your type indicator
- */
- , TOKENSTREAM = 0x0002
- /** Type indicator for a common tree node stream
- * \remark if a custom stream is created but it can be treated as
- * a common tree node stream, then you may OR in this value to your type indicator
- */
- , COMMONTREENODE = 0x0004
- /** Type mask for input stream so we can switch in the above types
- * \remark DO NOT USE 0x0000 as a stream type!
- */
- , INPUT_MASK = 0x0007
- };
- class RESOLVE_ENDIAN_AT_RUNTIME {};
- class BYTE_AGNOSTIC {};
- class ANTLR_LITTLE_ENDIAN {};
- class ANTLR_BIG_ENDIAN {};
- template<class ImplTraits, class SuperType>
- class IntStream : public ImplTraits::AllocPolicyType
- {
- public:
- typedef typename ImplTraits::StringType StringType;
-
- protected:
- /** Potentially useful in error reporting and so on, this string is
- * an identification of the input source. It may be NULL, so anything
- * attempting to access it needs to check this and substitute a sensible
- * default.
- */
- StringType m_streamName;
- /** Last marker position allocated
- */
- ANTLR_MARKER m_lastMarker;
-
- bool m_upper_case; //if set, values should be returbed in upper case
- /// Indicates whether we should implement endian-specific logic
- /// 0 - Undefined 1 - Default(machine and input are both same), 2 - Little Endian, 3 - Big Endian
- ANTLR_UINT8 m_endian_spec;
- public:
- IntStream();
-
- // Return a string that identifies the input source
- //
- StringType getSourceName();
- StringType& get_streamName();
- const StringType& get_streamName() const;
- ANTLR_MARKER get_lastMarker() const;
- SuperType* get_super();
- /**
- * Function that installs a version of LA that always
- * returns upper case. Only valid for character streams and creates a case
- * insensitive lexer if the lexer tokens are described in upper case. The
- * tokens will preserve case in the token text.
- */
- void setUcaseLA(bool flag);
- /** Consume the next 'ANTR3_UINT32' in the stream
- */
- void consume();
- /** Get ANTLR3_UINT32 at current input pointer + i ahead where i=1 is next ANTLR3_UINT32
- */
- ANTLR_UINT32 LA( ANTLR_INT32 i);
- /** Tell the stream to start buffering if it hasn't already. Return
- * current input position, index(), or some other marker so that
- * when passed to rewind() you get back to the same spot.
- * rewind(mark()) should not affect the input cursor.
- */
- ANTLR_MARKER mark();
-
- /** Return the current input symbol index 0..n where n indicates the
- * last symbol has been read.
- */
- ANTLR_MARKER index();
- /** Reset the stream so that next call to index would return marker.
- * The marker will usually be index() but it doesn't have to be. It's
- * just a marker to indicate what state the stream was in. This is
- * essentially calling release() and seek(). If there are markers
- * created after this marker argument, this routine must unroll them
- * like a stack. Assume the state the stream was in when this marker
- * was created.
- */
- void rewind(ANTLR_MARKER marker);
- /** Reset the stream to the last marker position, witouh destryoing the
- * last marker position.
- */
- void rewindLast();
- /** You may want to commit to a backtrack but don't want to force the
- * stream to keep bookkeeping objects around for a marker that is
- * no longer necessary. This will have the same behavior as
- * rewind() except it releases resources without the backward seek.
- */
- void release(ANTLR_MARKER mark);
- /** Set the input cursor to the position indicated by index. This is
- * normally used to seek ahead in the input stream. No buffering is
- * required to do this unless you know your stream will use seek to
- * move backwards such as when backtracking.
- *
- * This is different from rewind in its multi-directional
- * requirement and in that its argument is strictly an input cursor (index).
- *
- * For char streams, seeking forward must update the stream state such
- * as line number. For seeking backwards, you will be presumably
- * backtracking using the mark/rewind mechanism that restores state and
- * so this method does not need to update state when seeking backwards.
- *
- * Currently, this method is only used for efficient backtracking, but
- * in the future it may be used for incremental parsing.
- */
- void seek(ANTLR_MARKER index);
- /// Debug only method to flag consumption of initial off-channel
- /// tokens in the input stream
- ///
- void consumeInitialHiddenTokens();
- void rewindMark(ANTLR_MARKER marker);
- ANTLR_MARKER tindex();
- /** Frees any resources that were allocated for the implementation of this
- * interface. Usually this is just releasing the memory allocated
- * for the structure itself, but it may of course do anything it need to
- * so long as it does not stamp on anything else.
- */
- ~IntStream();
- protected:
- void setupIntStream(bool machineBigEndian, bool inputBigEndian);
- void findout_endian_spec(bool machineBigEndian, bool inputBigEndian);
- //If the user chooses this option, then we will be resolving stuffs at run-time
- ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> );
- //resolve into one of the three categories below at runtime
- void consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> );
- };
- template<class ImplTraits, class SuperType>
- class EBCDIC_IntStream : public IntStream<ImplTraits, SuperType>
- {
- public:
- ANTLR_UINT32 LA( ANTLR_INT32 i);
- protected:
- void setupIntStream();
- };
- template<class ImplTraits, class SuperType>
- class UTF8_IntStream : public IntStream<ImplTraits, SuperType>
- {
- public:
- ANTLR_UINT32 LA( ANTLR_INT32 i);
- void consume();
- protected:
- void setupIntStream(bool machineBigEndian, bool inputBigEndian);
- private:
- static const ANTLR_UINT32* TrailingBytesForUTF8();
- static const UTF32* OffsetsFromUTF8();
- };
- template<class ImplTraits, class SuperType>
- class UTF16_IntStream : public IntStream<ImplTraits, SuperType>
- {
- public:
- ANTLR_UINT32 LA( ANTLR_INT32 i);
- void consume();
- ANTLR_MARKER index();
- void seek(ANTLR_MARKER seekPoint);
- protected:
- void setupIntStream(bool machineBigEndian, bool inputBigEndian);
- /// \brief Return the input element assuming an 8 bit ascii input
- ///
- /// \param[in] input Input stream context pointer
- /// \param[in] la 1 based offset of next input stream element
- ///
- /// \return Next input character in internal ANTLR3 encoding (UTF32)
- ///
- ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<BYTE_AGNOSTIC> );
- /// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not
- ///
- /// \param[in] input Input stream context pointer
- /// \param[in] la 1 based offset of next input stream element
- ///
- /// \return Next input character in internal ANTLR3 encoding (UTF32)
- ///
- ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_LITTLE_ENDIAN> );
-
- /// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not
- ///
- /// \param[in] input Input stream context pointer
- /// \param[in] la 1 based offset of next input stream element
- ///
- /// \return Next input character in internal ANTLR3 encoding (UTF32)
- ///
- ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_BIG_ENDIAN> );
- /// \brief Consume the next character in a UTF16 input stream
- ///
- /// \param input Input stream context pointer
- ///
- void consume( ClassForwarder<BYTE_AGNOSTIC> );
- /// \brief Consume the next character in a UTF16 input stream when the input is Little Endian and the machine is not
- /// Note that the UTF16 routines do not do any substantial verification of the input stream as for performance
- /// sake, we assume it is validly encoded. So if a low surrogate is found at the curent input position then we
- /// just consume it. Surrogate pairs should be seen as Hi, Lo. So if we have a Lo first, then the input stream
- /// is fubar but we just ignore that.
- ///
- /// \param input Input stream context pointer
- ///
- void consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> );
- /// \brief Consume the next character in a UTF16 input stream when the input is Big Endian and the machine is not
- ///
- /// \param input Input stream context pointer
- ///
- void consume( ClassForwarder<ANTLR_BIG_ENDIAN> );
- };
- template<class ImplTraits, class SuperType>
- class UTF32_IntStream : public IntStream<ImplTraits, SuperType>
- {
- public:
- ANTLR_UINT32 LA( ANTLR_INT32 i);
- void consume();
-
- /// \brief Calculate the current index in the output stream.
- /// \param[in] input Input stream context pointer
- ///
- ANTLR_MARKER index();
- void seek(ANTLR_MARKER seekPoint);
- protected:
- void setupIntStream(bool machineBigEndian, bool inputBigEndian);
- ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> );
- ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<BYTE_AGNOSTIC> );
- ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_LITTLE_ENDIAN> );
- ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_BIG_ENDIAN> );
- void consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> );
- void consume( ClassForwarder<BYTE_AGNOSTIC> );
- void consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> );
- void consume( ClassForwarder<ANTLR_BIG_ENDIAN> );
- };
- template<class ImplTraits>
- class TokenIntStream : public IntStream<ImplTraits, typename ImplTraits::TokenStreamType >
- {
- public:
- typedef typename ImplTraits::CommonTokenType CommonTokenType;
- typedef typename ImplTraits::StringType StringType;
- typedef typename ImplTraits::TokenStreamType TokenStreamType;
- typedef IntStream<ImplTraits, TokenStreamType > BaseType;
- private:
- /** Because the indirect call, though small in individual cases can
- * mount up if there are thousands of tokens (very large input streams), callers
- * of size can optionally use this cached size field.
- */
- ANTLR_UINT32 m_cachedSize;
- public:
- TokenIntStream();
- ANTLR_UINT32 get_cachedSize() const;
- void set_cachedSize( ANTLR_UINT32 cachedSize );
- void consume();
- void consumeInitialHiddenTokens();
- ANTLR_UINT32 LA( ANTLR_INT32 i );
- ANTLR_MARKER mark();
- ANTLR_UINT32 size();
- void release();
- ANTLR_MARKER tindex();
- void rewindLast();
- void rewind(ANTLR_MARKER marker);
- void seek(ANTLR_MARKER index);
- StringType getSourceName();
- };
- template<class ImplTraits>
- class TreeNodeIntStream : public IntStream<ImplTraits, typename ImplTraits::TreeNodeStreamType>
- {
- public:
- typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType;
- typedef IntStream<ImplTraits, TreeNodeStreamType > BaseType;
- typedef typename ImplTraits::TreeType TreeType;
- typedef typename ImplTraits::TreeTypePtr TreeTypePtr;
- typedef typename ImplTraits::CommonTokenType CommonTokenType;
- public:
- void consume();
- ANTLR_MARKER tindex();
- ANTLR_UINT32 LA(ANTLR_INT32 i);
- ANTLR_MARKER mark();
- void release(ANTLR_MARKER marker);
- void rewindMark(ANTLR_MARKER marker);
- void rewindLast();
- void seek(ANTLR_MARKER index);
- ANTLR_UINT32 size();
- };
- }
- #include "antlr3intstream.inl"
- #endif
|