SMusatov
/
ydb
зеркало из https://github.com/ydb-platform/ydb.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
							namespace antlr3 {

template<class ImplTraits>
InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding)
{
    // First order of business is to read the file into some buffer space
    // as just straight 8 bit bytes. Then we will work out the encoding and
    // byte order and adjust the API functions that are installed for the
    // default 8Bit stream accordingly.
    //
    this->createFileStream(fileName);

    // We have the data in memory now so we can deal with it according to 
    // the encoding scheme we were given by the user.
    //
    m_encoding = encoding;

    // Now we need to work out the endian type and install any 
    // API functions that differ from 8Bit
    //
    this->setupInputStream();

    // Now we can set up the file name
    //	
    BaseType::m_streamName	= (const char* )fileName;
    m_fileName		= BaseType::m_streamName;
}

template<class ImplTraits>
InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name)
{
	// First order of business is to set up the stream and install the data pointer.
    // Then we will work out the encoding and byte order and adjust the API functions that are installed for the
    // default 8Bit stream accordingly.
    //
    this->createStringStream(data);
    
    // Size (in bytes) of the given 'string'
    //
    m_sizeBuf		= size;

    // We have the data in memory now so we can deal with it according to 
    // the encoding scheme we were given by the user.
    //
    m_encoding = encoding;

    // Now we need to work out the endian type and install any 
    // API functions that differ from 8Bit
    //
    this->setupInputStream();

    // Now we can set up the file name
    //	
    BaseType::m_streamName	= (name == NULL ) ? "" : (const char*)name;
    m_fileName		= BaseType::m_streamName;

}

template<class ImplTraits>
void InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data)
{
	if	(data == NULL)
	{
		ParseNullStringException ex;
		throw ex;
	}

	// Structure was allocated correctly, now we can install the pointer
	//
    m_data             = data;
    m_isAllocated	   = false;

	// Call the common 8 bit input stream handler
	// initialization.
	//
	this->genericSetupStream();
}

template<class ImplTraits>
void InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName)
{
	if	(fileName == NULL)
	{
		ParseFileAbsentException ex;
		throw ex;
	}

	// Structure was allocated correctly, now we can read the file.
	//
	FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName);

	// Call the common 8 bit input stream handler
	// initialization.
	//
	this->genericSetupStream();
}

template<class ImplTraits>
void InputStream<ImplTraits>::genericSetupStream()
{
	this->set_charByteSize(1);
	
    /* Set up the input stream brand new
     */
    this->reset();
    
    /* Install default line separator character (it can be replaced
     * by the grammar programmer later)
     */
    this->set_newLineChar((ANTLR_UCHAR)'\n');
}

template<class ImplTraits>
InputStream<ImplTraits>::~InputStream()
{
	// Free the input stream buffer if we allocated it
    //
    if	(m_isAllocated && (m_data != NULL))
		AllocPolicyType::free((void*)m_data); //const_cast is required
}

template<class ImplTraits>
ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const
{
	return m_data;
}
template<class ImplTraits>
ANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const
{
	return m_isAllocated;
}
template<class ImplTraits>
ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const
{
	return m_nextChar;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const
{
	return m_sizeBuf;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const
{
	return m_line;
}
template<class ImplTraits>
ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const
{
	return m_currentLine;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const
{
	return m_charPositionInLine;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const
{
	return m_markDepth;
}
template<class ImplTraits>
ANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers()
{
	return m_markers;
}
template<class ImplTraits>
ANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const
{
	return m_fileName;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const
{
	return m_fileNo;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const
{
	return m_newlineChar;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const
{
	return m_charByteSize;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const
{
	return m_encoding;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data )
{
	m_data = data;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated )
{
	m_isAllocated = isAllocated;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar )
{
	m_nextChar = nextChar;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf )
{
	m_sizeBuf = sizeBuf;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line )
{
	m_line = line;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine )
{
	m_currentLine = currentLine;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
{
	m_charPositionInLine = charPositionInLine;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth )
{
	m_markDepth = markDepth;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers )
{
	m_markers = markers;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName )
{
	m_fileName = fileName;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo )
{
	m_fileNo = fileNo;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar )
{
	m_newlineChar = newlineChar;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize )
{
	m_charByteSize = charByteSize;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding )
{
	m_encoding = encoding;
}

template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine()
{
	++m_charPositionInLine;
}

template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::inc_line()
{
	++m_line;
}

template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth()
{
	++m_markDepth;
}

template<class ImplTraits>
ANTLR_INLINE void	InputStream<ImplTraits>::reset()
{
	m_nextChar		= m_data;	/* Input at first character */
    m_line			= 1;		/* starts at line 1	    */
    m_charPositionInLine	= 0;
    m_currentLine		= m_data;
    m_markDepth		= 0;		/* Reset markers	    */
    
    /* Clear out up the markers table if it is there
     */
	m_markers.clear();
}

template<class ImplTraits>
void    InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name)
{
	m_isAllocated	= false;
    m_data		= inString;
    m_sizeBuf	= size;
    
    // Now we can set up the file name. As we are reusing the stream, there may already
    // be a string that we can reuse for holding the filename.
    //
	if	( BaseType::m_streamName.empty() ) 
	{
		BaseType::m_streamName	= ((name == NULL) ? "-memory-" : (const char *)name);
		m_fileName		= BaseType::m_streamName;
	}
	else
	{
		BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name);
	}

    this->reset();
}

/*
template<class ImplTraits>
typename InputStream<ImplTraits>::DataType*	InputStream<ImplTraits>::LT(ANTLR_INT32 lt)
{
	return this->LA(lt);
}
*/

template<class ImplTraits>
ANTLR_UINT32	InputStream<ImplTraits>::size()
{
	return m_sizeBuf;
}

template<class ImplTraits>
ANTLR_MARKER	InputStream<ImplTraits>::index_impl()
{
	return (ANTLR_MARKER)m_nextChar;
}


template<class ImplTraits>
typename InputStream<ImplTraits>::StringType	InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop)
{
	std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 );
	StringType str( (const char*)start, len );
	return str;
}

template<class ImplTraits>
ANTLR_UINT32	InputStream<ImplTraits>::get_line()
{
	return m_line;
}

template<class ImplTraits>
const typename InputStream<ImplTraits>::DataType*	InputStream<ImplTraits>::getLineBuf()
{
	return m_currentLine;
}

template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32	InputStream<ImplTraits>::get_charPositionInLine()
{
	return m_charPositionInLine;
}

template<class ImplTraits>
ANTLR_INLINE void	InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position)
{
	m_charPositionInLine = position;
}

template<class ImplTraits>
void	InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar)
{
	m_newlineChar = newlineChar;
}

template<class ImplTraits>
ANTLR_INLINE LexState<ImplTraits>::LexState()
{
	m_nextChar = NULL;
	m_line = 0;
	m_currentLine = NULL;
	m_charPositionInLine = 0;
}

template<class ImplTraits>
ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const
{
	return m_nextChar;
}

template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const
{
	return m_line;
}

template<class ImplTraits>
ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const
{
	return m_currentLine;
}

template<class ImplTraits>
ANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const
{
	return m_charPositionInLine;
}

template<class ImplTraits>
ANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar )
{
	m_nextChar = nextChar;
}

template<class ImplTraits>
ANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line )
{
	m_line = line;
}

template<class ImplTraits>
ANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine )
{
	m_currentLine = currentLine;
}

template<class ImplTraits>
ANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
{
	m_charPositionInLine = charPositionInLine;
}

template<class ImplTraits>
ANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType*	InputStream<ImplTraits>::get_istream()
{
	return this;
}

template<class ImplTraits>
void InputStream<ImplTraits>::setupInputStream()
{
	bool  isBigEndian;

    // Used to determine the endianness of the machine we are currently
    // running on.
    //
    ANTLR_UINT16 bomTest = 0xFEFF;
    
    // What endianess is the machine we are running on? If the incoming
    // encoding endianess is the same as this machine's natural byte order
    // then we can use more efficient API calls.
    //
    if  (*((ANTLR_UINT8*)(&bomTest)) == 0xFE)
    {
        isBigEndian = true;
    }
    else
    {
        isBigEndian = false;
    }

    // What encoding did the user tell us {s}he thought it was? I am going
    // to get sick of the questions on antlr-interest, I know I am.
    //
    switch  (m_encoding)
    {
        case    ENC_UTF8:

            // See if there is a BOM at the start of this UTF-8 sequence
            // and just eat it if there is. Windows .TXT files have this for instance
            // as it identifies UTF-8 even though it is of no consequence for byte order
            // as UTF-8 does not have a byte order.
            //
            if  (       (*(m_nextChar))      == 0xEF
                    &&  (*(m_nextChar+1))    == 0xBB
                    &&  (*(m_nextChar+2))    == 0xBF
                )
            {
                // The UTF8 BOM is present so skip it
                //
                m_nextChar += 3;
            }

            // Install the UTF8 input routines
            //
			this->setupIntStream( isBigEndian, isBigEndian );
			this->set_charByteSize(0);
            break;

        case    ENC_UTF16:

            // See if there is a BOM at the start of the input. If not then
            // we assume that the byte order is the natural order of this
            // machine (or it is really UCS2). If there is a BOM we determine if the encoding
            // is the same as the natural order of this machine.
            //
            if  (       (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFE
                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFF
                )
            {
                // BOM Present, indicates Big Endian
                //
                m_nextChar += 1;

				this->setupIntStream( isBigEndian, true );
            }
            else if  (      (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFF
                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFE
                )
            {
                // BOM present, indicates Little Endian
                //
                m_nextChar += 1;

                this->setupIntStream( isBigEndian, false );
            }
            else
            {
                // No BOM present, assume local computer byte order
                //
                this->setupIntStream(isBigEndian, isBigEndian);
            }
			this->set_charByteSize(2);
            break;

        case    ENC_UTF32:

            // See if there is a BOM at the start of the input. If not then
            // we assume that the byte order is the natural order of this
            // machine. If there is we determine if the encoding
            // is the same as the natural order of this machine.
            //
            if  (       (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0x00
                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2))    == 0xFE
                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3))    == 0xFF
                )
            {
                // BOM Present, indicates Big Endian
                //
                m_nextChar += 1;

                this->setupIntStream(isBigEndian, true);
            }
            else if  (      (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFF
                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFE
                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
                )
            {
                // BOM present, indicates Little Endian
                //
                m_nextChar += 1;

				this->setupIntStream( isBigEndian, false );
            }
            else
            {
                // No BOM present, assume local computer byte order
                //
				this->setupIntStream( isBigEndian, isBigEndian );
            }
			this->set_charByteSize(4);
            break;

        case    ENC_UTF16BE:

            // Encoding is definately Big Endian with no BOM
            //
			this->setupIntStream( isBigEndian, true );
			this->set_charByteSize(2);
            break;

        case    ENC_UTF16LE:

            // Encoding is definately Little Endian with no BOM
            //
            this->setupIntStream( isBigEndian, false );
			this->set_charByteSize(2);
            break;

        case    ENC_UTF32BE:

            // Encoding is definately Big Endian with no BOM
            //
			this->setupIntStream( isBigEndian, true );
			this->set_charByteSize(4);
            break;

        case    ENC_UTF32LE:

            // Encoding is definately Little Endian with no BOM
            //
			this->setupIntStream( isBigEndian, false );
			this->set_charByteSize(4);
            break;

        case    ENC_EBCDIC:

            // EBCDIC is basically the same as ASCII but with an on the
            // fly translation to ASCII
            //
            this->setupIntStream( isBigEndian, isBigEndian );
			this->set_charByteSize(1);
            break;

        case    ENC_8BIT:
        default:

            // Standard 8bit/ASCII
            //
            this->setupIntStream( isBigEndian, isBigEndian );
			this->set_charByteSize(1);
            break;
    }    
}

}