12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661 |
- namespace antlr3 {
- template<class ImplTraits, class SuperType>
- ANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream()
- {
- m_lastMarker = 0;
- m_upper_case = false;
- }
- template<class ImplTraits, class SuperType>
- ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType IntStream<ImplTraits, SuperType>::getSourceName()
- {
- return m_streamName;
- }
- template<class ImplTraits, class SuperType>
- ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName()
- {
- return m_streamName;
- }
- template<class ImplTraits, class SuperType>
- ANTLR_INLINE const typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName() const
- {
- return m_streamName;
- }
- template<class ImplTraits, class SuperType>
- ANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const
- {
- return m_lastMarker;
- }
- template<class ImplTraits, class SuperType>
- ANTLR_INLINE void IntStream<ImplTraits, SuperType>::setUcaseLA(bool flag)
- {
- m_upper_case = flag;
- }
- template<class ImplTraits, class SuperType>
- ANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super()
- {
- return static_cast<SuperType*>(this);
- }
- template<class ImplTraits, class SuperType>
- void IntStream<ImplTraits, SuperType>::consume()
- {
- SuperType* input = this->get_super();
- const ANTLR_UINT8* nextChar = input->get_nextChar();
- const ANTLR_UINT8* data = input->get_data();
- ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
- if ( nextChar < ( data + sizeBuf ) )
- {
- /* Indicate one more character in this line
- */
- input->inc_charPositionInLine();
-
- if ((ANTLR_UCHAR)(*(nextChar)) == input->get_newlineChar() )
- {
- /* Reset for start of a new line of input
- */
- input->inc_line();
- input->set_charPositionInLine(0);
- input->set_currentLine(nextChar + 1);
- }
- /* Increment to next character position
- */
- input->set_nextChar( nextChar + 1 );
- }
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la )
- {
- SuperType* input = this->get_super();
- const ANTLR_UINT8* nextChar = input->get_nextChar();
- const ANTLR_UINT8* data = input->get_data();
- ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
- if (( nextChar + la - 1) >= (data + sizeBuf))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- else
- {
- if( !m_upper_case )
- return (ANTLR_UCHAR)(*(nextChar + la - 1));
- else
- return (ANTLR_UCHAR)toupper(*(nextChar + la - 1));
- }
- }
- template<class ImplTraits, class SuperType>
- ANTLR_MARKER IntStream<ImplTraits, SuperType>::mark()
- {
- LexState<ImplTraits>* state;
- SuperType* input = this->get_super();
- /* New mark point
- */
- input->inc_markDepth();
- /* See if we are revisiting a mark as we can just reuse the vector
- * entry if we are, otherwise, we need a new one
- */
- if (input->get_markDepth() > input->get_markers().size() )
- {
- input->get_markers().push_back( LexState<ImplTraits>() );
- LexState<ImplTraits>& state_r = input->get_markers().back();
- state = &state_r;
- }
- else
- {
- LexState<ImplTraits>& state_r = input->get_markers().at( input->get_markDepth() - 1 );
- state = &state_r;
- /* Assume no errors for speed, it will just blow up if the table failed
- * for some reasons, hence lots of unit tests on the tables ;-)
- */
- }
- /* We have created or retrieved the state, so update it with the current
- * elements of the lexer state.
- */
- state->set_charPositionInLine( input->get_charPositionInLine() );
- state->set_currentLine( input->get_currentLine() );
- state->set_line( input->get_line() );
- state->set_nextChar( input->get_nextChar() );
- m_lastMarker = input->get_markDepth();
- /* And that's it
- */
- return input->get_markDepth();
- }
- template<class ImplTraits, class SuperType>
- ANTLR_MARKER IntStream<ImplTraits, SuperType>::index()
- {
- SuperType* input = this->get_super();
- return input->index_impl();
- }
- template<class ImplTraits, class SuperType>
- void IntStream<ImplTraits, SuperType>::rewind(ANTLR_MARKER mark)
- {
- SuperType* input = this->get_super();
- /* Perform any clean up of the marks
- */
- this->release(mark);
- /* Find the supplied mark state
- */
- ANTLR_UINT32 idx = static_cast<ANTLR_UINT32>( mark-1 );
- typename ImplTraits::LexStateType& state = input->get_markers().at( idx );
- /* Seek input pointer to the requested point (note we supply the void *pointer
- * to whatever is implementing the int stream to seek).
- */
- this->seek( (ANTLR_MARKER)state.get_nextChar() );
-
- /* Reset to the reset of the information in the mark
- */
- input->set_charPositionInLine( state.get_charPositionInLine() );
- input->set_currentLine( state.get_currentLine() );
- input->set_line( state.get_line() );
- input->set_nextChar( state.get_nextChar() );
- /* And we are done
- */
- }
- template<class ImplTraits, class SuperType>
- void IntStream<ImplTraits, SuperType>::rewindLast()
- {
- this->rewind(m_lastMarker);
- }
- template<class ImplTraits, class SuperType>
- void IntStream<ImplTraits, SuperType>::release(ANTLR_MARKER mark)
- {
- SuperType* input = this->get_super();
- /* We don't do much here in fact as we never free any higher marks in
- * the hashtable as we just resuse any memory allocated for them.
- */
- input->set_markDepth( (ANTLR_UINT32)(mark - 1) );
- }
- template<class ImplTraits, class SuperType>
- void IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
- {
- }
- template<class ImplTraits, class SuperType>
- void IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
- {
- ANTLR_INT32 count;
- SuperType* input = this->get_super();
- ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar();
- /* If the requested seek point is less than the current
- * input point, then we assume that we are resetting from a mark
- * and do not need to scan, but can just set to there.
- */
- if (seekPoint <= nextChar)
- {
- input->set_nextChar((ANTLR_UINT8*) seekPoint);
- }
- else
- {
- count = (ANTLR_UINT32)(seekPoint - nextChar);
- while (count--)
- {
- this->consume();
- }
- }
- }
- template<class ImplTraits, class SuperType>
- IntStream<ImplTraits, SuperType>::~IntStream()
- {
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 EBCDIC_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la)
- {
- // EBCDIC to ASCII conversion table
- //
- // This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX
- // translation and the character tables are published all over the interweb.
- //
- const ANTLR_UCHAR e2a[256] =
- {
- 0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f,
- 0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97,
- 0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b,
- 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
- 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
- 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
- 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
- 0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
- 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
- 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f,
- 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
- 0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
- 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
- 0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
- 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
- 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
- 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
- 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
- 0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
- 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae,
- 0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
- 0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7,
- 0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
- 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
- 0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
- 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff,
- 0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
- 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- 0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e
- };
- SuperType* input = this->get_super();
- if (( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() ))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- else
- {
- // Translate the required character via the constant conversion table
- //
- return e2a[(*(input->get_nextChar() + la - 1))];
- }
- }
- template<class ImplTraits, class SuperType>
- void EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream()
- {
- SuperType* super = this->get_super();
- super->set_charByteSize(1);
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i)
- {
- return this->LA(i, ClassForwarder< typename ImplTraits::Endianness >() );
- }
- template<class ImplTraits, class SuperType>
- void UTF16_IntStream<ImplTraits, SuperType>::consume()
- {
- this->consume( ClassForwarder< typename ImplTraits::Endianness >() );
- }
- template<class ImplTraits, class SuperType>
- ANTLR_MARKER UTF16_IntStream<ImplTraits, SuperType>::index()
- {
- SuperType* input = this->get_super();
- return (ANTLR_MARKER)(input->get_nextChar());
- }
- template<class ImplTraits, class SuperType>
- void UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
- {
- SuperType* input = this->get_super();
- // If the requested seek point is less than the current
- // input point, then we assume that we are resetting from a mark
- // and do not need to scan, but can just set to there as rewind will
- // reset line numbers and so on.
- //
- if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
- {
- input->set_nextChar( seekPoint );
- }
- else
- {
- // Call consume until we reach the asked for seek point or EOF
- //
- while( (this->LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) )
- {
- this->consume();
- }
- }
- }
- template<class ImplTraits, class SuperType>
- void IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian)
- {
- // We must install different UTF16 routines according to whether the input
- // is the same endianess as the machine we are executing upon or not. If it is not
- // then we must install methods that can convert the endianess on the fly as they go
- //
- if(machineBigEndian == true)
- {
- // Machine is Big Endian, if the input is also then install the
- // methods that do not access input by bytes and reverse them.
- // Otherwise install endian aware methods.
- //
- if (inputBigEndian == true)
- {
- // Input is machine compatible
- //
- m_endian_spec = 1;
- }
- else
- {
- // Need to use methods that know that the input is little endian
- //
- m_endian_spec = 2;
- }
- }
- else
- {
- // Machine is Little Endian, if the input is also then install the
- // methods that do not access input by bytes and reverse them.
- // Otherwise install endian aware methods.
- //
- if (inputBigEndian == false)
- {
- // Input is machine compatible
- //
- m_endian_spec = 1;
- }
- else
- {
- // Need to use methods that know that the input is Big Endian
- //
- m_endian_spec = 3;
- }
- }
- }
- template<class ImplTraits, class SuperType>
- void UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
- {
- SuperType* super = this->get_super();
- super->set_charByteSize(2);
- this->findout_endian_spec( machineBigEndian, inputBigEndian );
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
- {
- assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
- switch(m_endian_spec)
- {
- case 1:
- return this->LA(i, ClassForwarder<BYTE_AGNOSTIC>() );
- break;
- case 2:
- return this->LA(i, ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
- break;
- case 3:
- return this->LA(i, ClassForwarder<ANTLR_BIG_ENDIAN>() );
- break;
- default:
- break;
- }
- return 0;
- }
- template<class ImplTraits, class SuperType>
- void IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
- {
- assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
- switch(m_endian_spec)
- {
- case 1:
- this->consume( ClassForwarder<BYTE_AGNOSTIC>() );
- break;
- case 2:
- this->consume( ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
- break;
- case 3:
- this->consume( ClassForwarder<ANTLR_BIG_ENDIAN>() );
- break;
- default:
- break;
- }
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
- {
- SuperType* input;
- UTF32 ch;
- UTF32 ch2;
- UTF16* nextChar;
- // Find the input interface and where we are currently pointing to
- // in the input stream
- //
- input = this->get_super;
- nextChar = input->get_nextChar();
- // If a positive offset then advance forward, else retreat
- //
- if (la >= 0)
- {
- while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
- {
- // Advance our copy of the input pointer
- //
- // Next char in natural machine byte order
- //
- ch = *nextChar++;
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
- {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
- {
- // Next character is in natural machine byte order
- //
- ch2 = *nextChar;
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // We consumed one 16 bit character
- //
- nextChar++;
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- // Note that we did not check for an invalid low surrogate here, or that fact that the
- // lo surrogate was missing. We just picked out one 16 bit character unless the character
- // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
- //
- }
- }
- else
- {
- // We need to go backwards from our input point
- //
- while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
- {
- // Get the previous 16 bit character
- //
- ch = *--nextChar;
- // If we found a low surrogate then go back one more character if
- // the hi surrogate is there
- //
- if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
- {
- ch2 = *(nextChar-1);
- if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
- {
- // Yes, there is a high surrogate to match it so decrement one more and point to that
- //
- nextChar--;
- }
- }
- }
- }
- // Our local copy of nextChar is now pointing to either the correct character or end of file
- //
- // Input buffer size is always in bytes
- //
- if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- else
- {
- // Pick up the next 16 character (native machine byte order)
- //
- ch = *nextChar++;
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
- {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
- {
- // Next character is in natural machine byte order
- //
- ch2 = *nextChar;
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // Construct the UTF32 code point
- //
- ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
- + (ch2 - UNI_SUR_LOW_START) + halfBase;
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- }
- return ch;
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
- {
- SuperType* input;
- UTF32 ch;
- UTF32 ch2;
- ANTLR_UCHAR* nextChar;
- // Find the input interface and where we are currently pointing to
- // in the input stream
- //
- input = this->get_super();
- nextChar = input->get_nextChar();
- // If a positive offset then advance forward, else retreat
- //
- if (la >= 0)
- {
- while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
- {
- // Advance our copy of the input pointer
- //
- // Next char in Little Endian byte order
- //
- ch = (*nextChar) + (*(nextChar+1) << 8);
- nextChar += 2;
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
- {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
- {
- // Next character is in little endian byte order
- //
- ch2 = (*nextChar) + (*(nextChar+1) << 8);
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // We consumed one 16 bit character
- //
- nextChar += 2;
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- // Note that we did not check for an invalid low surrogate here, or that fact that the
- // lo surrogate was missing. We just picked out one 16 bit character unless the character
- // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
- //
- }
- }
- else
- {
- // We need to go backwards from our input point
- //
- while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
- {
- // Get the previous 16 bit character
- //
- ch = (*nextChar - 2) + ((*nextChar -1) << 8);
- nextChar -= 2;
- // If we found a low surrogate then go back one more character if
- // the hi surrogate is there
- //
- if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
- {
- ch2 = (*nextChar - 2) + ((*nextChar -1) << 8);
- if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
- {
- // Yes, there is a high surrogate to match it so decrement one more and point to that
- //
- nextChar -=2;
- }
- }
- }
- }
- // Our local copy of nextChar is now pointing to either the correct character or end of file
- //
- // Input buffer size is always in bytes
- //
- if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- else
- {
- // Pick up the next 16 character (little endian byte order)
- //
- ch = (*nextChar) + (*(nextChar+1) << 8);
- nextChar += 2;
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
- {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
- {
- // Next character is in little endian byte order
- //
- ch2 = (*nextChar) + (*(nextChar+1) << 8);
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // Construct the UTF32 code point
- //
- ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
- + (ch2 - UNI_SUR_LOW_START) + halfBase;
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- }
- return ch;
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
- {
- SuperType* input;
- UTF32 ch;
- UTF32 ch2;
- ANTLR_UCHAR* nextChar;
- // Find the input interface and where we are currently pointing to
- // in the input stream
- //
- input = this->get_super();
- nextChar = input->get_nextChar();
- // If a positive offset then advance forward, else retreat
- //
- if (la >= 0)
- {
- while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
- {
- // Advance our copy of the input pointer
- //
- // Next char in Big Endian byte order
- //
- ch = ((*nextChar) << 8) + *(nextChar+1);
- nextChar += 2;
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
- {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
- {
- // Next character is in big endian byte order
- //
- ch2 = ((*nextChar) << 8) + *(nextChar+1);
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // We consumed one 16 bit character
- //
- nextChar += 2;
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- // Note that we did not check for an invalid low surrogate here, or that fact that the
- // lo surrogate was missing. We just picked out one 16 bit character unless the character
- // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
- //
- }
- }
- else
- {
- // We need to go backwards from our input point
- //
- while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
- {
- // Get the previous 16 bit character
- //
- ch = ((*nextChar - 2) << 8) + (*nextChar -1);
- nextChar -= 2;
- // If we found a low surrogate then go back one more character if
- // the hi surrogate is there
- //
- if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
- {
- ch2 = ((*nextChar - 2) << 8) + (*nextChar -1);
- if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
- {
- // Yes, there is a high surrogate to match it so decrement one more and point to that
- //
- nextChar -=2;
- }
- }
- }
- }
- // Our local copy of nextChar is now pointing to either the correct character or end of file
- //
- // Input buffer size is always in bytes
- //
- if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- else
- {
- // Pick up the next 16 character (big endian byte order)
- //
- ch = ((*nextChar) << 8) + *(nextChar+1);
- nextChar += 2;
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
- {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
- {
- // Next character is in big endian byte order
- //
- ch2 = ((*nextChar) << 8) + *(nextChar+1);
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // Construct the UTF32 code point
- //
- ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
- + (ch2 - UNI_SUR_LOW_START) + halfBase;
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- }
- return ch;
- }
- template<class ImplTraits, class SuperType>
- void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOSTIC> )
- {
- SuperType* input;
- UTF32 ch;
- UTF32 ch2;
- input = this->get_super();
- // Buffer size is always in bytes
- //
- if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
- {
- // Indicate one more character in this line
- //
- input->inc_charPositionInLine();
- if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
- {
- // Reset for start of a new line of input
- //
- input->inc_line();
- input->set_charPositionInLine(0);
- input->set_currentLine( input->get_nextChar() + 1 );
- }
- // Increment to next character position, accounting for any surrogates
- //
- // Next char in natural machine byte order
- //
- ch = *(input->get_nextChar());
- // We consumed one 16 bit character
- //
- input->set_nextChar( input->get_nextChar() + 1 );
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
- {
- // Next character is in natural machine byte order
- //
- ch2 = *(input->get_nextChar());
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // We consumed one 16 bit character
- //
- input->set_nextChar( input->get_nextChar() + 1 );
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- // Note that we did not check for an invalid low surrogate here, or that fact that the
- // lo surrogate was missing. We just picked out one 16 bit character unless the character
- // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
- //
- }
- }
- template<class ImplTraits, class SuperType>
- void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> )
- {
- SuperType* input;
- UTF32 ch;
- UTF32 ch2;
- input = this->get_super();
- // Buffer size is always in bytes
- //
- if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
- {
- // Indicate one more character in this line
- //
- input->inc_charPositionInLine();
- if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
- {
- // Reset for start of a new line of input
- //
- input->inc_line();
- input->set_charPositionInLine(0);
- input->set_currentLine(input->get_nextChar() + 1);
- }
- // Increment to next character position, accounting for any surrogates
- //
- // Next char in litle endian form
- //
- ch = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
- // We consumed one 16 bit character
- //
- input->set_nextChar( input->get_nextChar() + 1);
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
- {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
- {
- ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // We consumed one 16 bit character
- //
- input->set_nextChar( input->get_nextChar() + 1);
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- // Note that we did not check for an invalid low surrogate here, or that fact that the
- // lo surrogate was missing. We just picked out one 16 bit character unless the character
- // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
- //
- }
- }
- template<class ImplTraits, class SuperType>
- void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_ENDIAN> )
- {
- SuperType* input;
- UTF32 ch;
- UTF32 ch2;
- input = this->get_super();
- // Buffer size is always in bytes
- //
- if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
- {
- // Indicate one more character in this line
- //
- input->inc_charPositionInLine();
- if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
- {
- // Reset for start of a new line of input
- //
- input->inc_line();
- input->set_charPositionInLine(0);
- input->set_currentLine(input->get_nextChar() + 1);
- }
- // Increment to next character position, accounting for any surrogates
- //
- // Next char in big endian form
- //
- ch = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
- // We consumed one 16 bit character
- //
- input->set_nextChar( input->get_nextChar() + 1);
- // If we have a surrogate pair then we need to consume
- // a following valid LO surrogate.
- //
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
- {
- // If the 16 bits following the high surrogate are in the source buffer...
- //
- if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
- {
- // Big endian
- //
- ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
- // If it's a valid low surrogate, consume it
- //
- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
- {
- // We consumed one 16 bit character
- //
- input->set_nextChar( input->get_nextChar() + 1);
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it.
- //
- }
- // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
- // it because the buffer ended
- //
- }
- // Note that we did not check for an invalid low surrogate here, or that fact that the
- // lo surrogate was missing. We just picked out one 16 bit character unless the character
- // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
- //
- }
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i)
- {
- return this->LA( i, ClassForwarder<typename ImplTraits::Endianness>() );
- }
- template<class ImplTraits, class SuperType>
- ANTLR_MARKER UTF32_IntStream<ImplTraits, SuperType>::index()
- {
- SuperType* input = this->get_super();
- return (ANTLR_MARKER)(input->get_nextChar());
- }
- template<class ImplTraits, class SuperType>
- void UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
- {
- SuperType* input;
- input = this->get_super();
- // If the requested seek point is less than the current
- // input point, then we assume that we are resetting from a mark
- // and do not need to scan, but can just set to there as rewind will
- // reset line numbers and so on.
- //
- if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
- {
- input->set_nextChar( static_cast<typename ImplTraits::DataType*>(seekPoint) );
- }
- else
- {
- // Call consume until we reach the asked for seek point or EOF
- //
- while( (this->LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) )
- {
- this->consume();
- }
- }
- }
- template<class ImplTraits, class SuperType>
- void UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
- {
- SuperType* super = this->get_super();
- super->set_charByteSize(4);
- this->findout_endian_spec(machineBigEndian, inputBigEndian);
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
- {
- SuperType* input = this->get_super();
- if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- else
- {
- return (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
- }
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
- {
- SuperType* input = this->get_super();
- if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- else
- {
- ANTLR_UCHAR c;
- c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
- // Swap Endianess to Big Endian
- //
- return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
- }
- }
- template<class ImplTraits, class SuperType>
- ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
- {
- SuperType* input = this->get_super();
- if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- else
- {
- ANTLR_UCHAR c;
- c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
- // Swap Endianess to Little Endian
- //
- return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
- }
- }
- template<class ImplTraits, class SuperType>
- void UTF32_IntStream<ImplTraits, SuperType>::consume()
- {
- SuperType* input = this->get_super();
- // SizeBuf is always in bytes
- //
- if ( input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/4 ))
- {
- /* Indicate one more character in this line
- */
- input->inc_charPositionInLine();
-
- if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
- {
- /* Reset for start of a new line of input
- */
- input->inc_line();
- input->set_charPositionInLine(0);
- input->set_currentLine( input->get_nextChar() + 1 );
- }
- /* Increment to next character position
- */
- input->set_nextChar( input->get_nextChar() + 1 );
- }
- }
- template<class ImplTraits, class SuperType>
- void UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
- {
- SuperType* super = this->get_super();
- super->set_charByteSize(0);
- }
- // ------------------------------------------------------
- // Following is from Unicode.org (see antlr3convertutf.c)
- //
- /// Index into the table below with the first byte of a UTF-8 sequence to
- /// get the number of trailing bytes that are supposed to follow it.
- /// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
- /// left as-is for anyone who may want to do such conversion, which was
- /// allowed in earlier algorithms.
- ///
- template<class ImplTraits, class SuperType>
- const ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8()
- {
- static const ANTLR_UINT32 trailingBytesForUTF8[256] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
- };
- return trailingBytesForUTF8;
- }
- /// Magic values subtracted from a buffer value during UTF8 conversion.
- /// This table contains as many values as there might be trailing bytes
- /// in a UTF-8 sequence.
- ///
- template<class ImplTraits, class SuperType>
- const UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8()
- {
- static const UTF32 offsetsFromUTF8[6] =
- { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL
- };
- return offsetsFromUTF8;
- }
- // End of Unicode.org tables
- // -------------------------
- /** \brief Consume the next character in a UTF8 input stream
- *
- * \param input Input stream context pointer
- */
- template<class ImplTraits, class SuperType>
- void UTF8_IntStream<ImplTraits, SuperType>::consume()
- {
- SuperType* input = this->get_super();
- const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
- const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
- ANTLR_UINT32 extraBytesToRead;
- ANTLR_UCHAR ch;
- ANTLR_UINT8* nextChar;
- nextChar = input->get_nextChar();
- if (nextChar < (input->get_data() + input->get_sizeBuf()))
- {
- // Indicate one more character in this line
- //
- input->inc_charPositionInLine();
-
- // Are there more bytes needed to make up the whole thing?
- //
- extraBytesToRead = trailingBytesForUTF8[*nextChar];
- if ((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf()))
- {
- input->set_nextChar( input->get_data() + input->get_sizeBuf() );
- return;
- }
- // Cases deliberately fall through (see note A in antlrconvertutf.c)
- // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so
- // we allow it.
- //
- ch = 0;
- switch (extraBytesToRead)
- {
- case 5: ch += *nextChar++; ch <<= 6;
- case 4: ch += *nextChar++; ch <<= 6;
- case 3: ch += *nextChar++; ch <<= 6;
- case 2: ch += *nextChar++; ch <<= 6;
- case 1: ch += *nextChar++; ch <<= 6;
- case 0: ch += *nextChar++;
- }
- // Magically correct the input value
- //
- ch -= offsetsFromUTF8[extraBytesToRead];
- if (ch == input->get_newlineChar())
- {
- /* Reset for start of a new line of input
- */
- input->inc_line();
- input->set_charPositionInLine(0);
- input->set_currentLine(nextChar);
- }
- // Update input pointer
- //
- input->set_nextChar(nextChar);
- }
- }
- /** \brief Return the input element assuming a UTF8 input
- *
- * \param[in] input Input stream context pointer
- * \param[in] la 1 based offset of next input stream element
- *
- * \return Next input character in internal ANTLR3 encoding (UTF32)
- */
- template<class ImplTraits, class SuperType>
- ANTLR_UCHAR UTF8_IntStream<ImplTraits, SuperType>::LA(ANTLR_INT32 la)
- {
- SuperType* input = this->get_super();
- const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
- const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
- ANTLR_UINT32 extraBytesToRead;
- ANTLR_UCHAR ch;
- ANTLR_UINT8* nextChar;
- nextChar = input->get_nextChar();
- // Do we need to traverse forwards or backwards?
- // - LA(0) is treated as LA(1) and we assume that the nextChar is
- // already positioned.
- // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding
- // - LA(-n) means we must traverse backwards n chracters
- //
- if (la > 1) {
- // Make sure that we have at least one character left before trying to
- // loop through the buffer.
- //
- if (nextChar < (input->get_data() + input->get_sizeBuf()))
- {
- // Now traverse n-1 characters forward
- //
- while (--la > 0)
- {
- // Does the next character require trailing bytes?
- // If so advance the pointer by that many bytes as well as advancing
- // one position for what will be at least a single byte character.
- //
- nextChar += trailingBytesForUTF8[*nextChar] + 1;
- // Does that calculation take us past the byte length of the buffer?
- //
- if (nextChar >= (input->get_data() + input->get_sizeBuf()))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- }
- }
- else
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- }
- else
- {
- // LA is negative so we decrease the pointer by n character positions
- //
- while (nextChar > input->get_data() && la++ < 0)
- {
- // Traversing backwards in UTF8 means decermenting by one
- // then continuing to decrement while ever a character pattern
- // is flagged as being a trailing byte of an encoded code point.
- // Trailing UTF8 bytes always start with 10 in binary. We assumne that
- // the UTF8 is well formed and do not check boundary conditions
- //
- nextChar--;
- while ((*nextChar & 0xC0) == 0x80)
- {
- nextChar--;
- }
- }
- }
- // nextChar is now pointing at the UTF8 encoded character that we need to
- // decode and return.
- //
- // Are there more bytes needed to make up the whole thing?
- //
- extraBytesToRead = trailingBytesForUTF8[*nextChar];
- if (nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf()))
- {
- return ANTLR_CHARSTREAM_EOF;
- }
- // Cases deliberately fall through (see note A in antlrconvertutf.c)
- //
- ch = 0;
- switch (extraBytesToRead)
- {
- case 5: ch += *nextChar++; ch <<= 6;
- case 4: ch += *nextChar++; ch <<= 6;
- case 3: ch += *nextChar++; ch <<= 6;
- case 2: ch += *nextChar++; ch <<= 6;
- case 1: ch += *nextChar++; ch <<= 6;
- case 0: ch += *nextChar++;
- }
- // Magically correct the input value
- //
- ch -= offsetsFromUTF8[extraBytesToRead];
- return ch;
- }
- template<class ImplTraits>
- TokenIntStream<ImplTraits>::TokenIntStream()
- {
- m_cachedSize = 0;
- }
- template<class ImplTraits>
- ANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const
- {
- return m_cachedSize;
- }
- template<class ImplTraits>
- void TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize )
- {
- m_cachedSize = cachedSize;
- }
- /** Move the input pointer to the next incoming token. The stream
- * must become active with LT(1) available. consume() simply
- * moves the input pointer so that LT(1) points at the next
- * input symbol. Consume at least one token.
- *
- * Walk past any token not on the channel the parser is listening to.
- */
- template<class ImplTraits>
- void TokenIntStream<ImplTraits>::consume()
- {
- TokenStreamType* cts = static_cast<TokenStreamType*>(this);
- if((ANTLR_UINT32)cts->get_p() < m_cachedSize )
- {
- cts->inc_p();
- cts->set_p( cts->skipOffTokenChannels(cts->get_p()) );
- }
- }
- template<class ImplTraits>
- void TokenIntStream<ImplTraits>::consumeInitialHiddenTokens()
- {
- ANTLR_MARKER first;
- ANTLR_INT32 i;
- TokenStreamType* ts;
- ts = this->get_super();
- first = this->index();
- for (i=0; i<first; i++)
- {
- ts->get_debugger()->consumeHiddenToken(ts->get(i));
- }
- ts->set_initialStreamState(false);
- }
- template<class ImplTraits>
- ANTLR_UINT32 TokenIntStream<ImplTraits>::LA( ANTLR_INT32 i )
- {
- const CommonTokenType* tok;
- TokenStreamType* ts = static_cast<TokenStreamType*>(this);
- tok = ts->LT(i);
- if (tok != NULL)
- {
- return tok->get_type();
- }
- else
- {
- return CommonTokenType::TOKEN_INVALID;
- }
- }
- template<class ImplTraits>
- ANTLR_MARKER TokenIntStream<ImplTraits>::mark()
- {
- BaseType::m_lastMarker = this->index();
- return BaseType::m_lastMarker;
- }
- template<class ImplTraits>
- ANTLR_UINT32 TokenIntStream<ImplTraits>::size()
- {
- if (this->get_cachedSize() > 0)
- {
- return this->get_cachedSize();
- }
- TokenStreamType* cts = this->get_super();
- this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) );
- return this->get_cachedSize();
- }
- template<class ImplTraits>
- void TokenIntStream<ImplTraits>::release()
- {
- return;
- }
- template<class ImplTraits>
- ANTLR_MARKER TokenIntStream<ImplTraits>::tindex()
- {
- return this->get_super()->get_p();
- }
- template<class ImplTraits>
- void TokenIntStream<ImplTraits>::rewindLast()
- {
- this->rewind( this->get_lastMarker() );
- }
- template<class ImplTraits>
- void TokenIntStream<ImplTraits>::rewind(ANTLR_MARKER marker)
- {
- return this->seek(marker);
- }
- template<class ImplTraits>
- void TokenIntStream<ImplTraits>::seek(ANTLR_MARKER index)
- {
- TokenStreamType* cts = static_cast<TokenStreamType*>(this);
- cts->set_p( static_cast<ANTLR_INT32>(index) );
- }
- /// Return a string that represents the name assoicated with the input source
- ///
- /// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream.
- ///
- /// /returns
- /// /implements ANTLR3_INT_STREAM_struct::getSourceName()
- ///
- template<class ImplTraits>
- typename TokenIntStream<ImplTraits>::StringType
- TokenIntStream<ImplTraits>::getSourceName()
- {
- // Slightly convoluted as we must trace back to the lexer's input source
- // via the token source. The streamName that is here is not initialized
- // because this is a token stream, not a file or string stream, which are the
- // only things that have a context for a source name.
- //
- return this->get_super()->get_tokenSource()->get_fileName();
- }
- template<class ImplTraits>
- void TreeNodeIntStream<ImplTraits>::consume()
- {
- TreeNodeStreamType* ctns = this->get_super();
- if( ctns->get_p() == -1 )
- ctns->fillBufferRoot();
- ctns->inc_p();
- }
- template<class ImplTraits>
- ANTLR_MARKER TreeNodeIntStream<ImplTraits>::tindex()
- {
- TreeNodeStreamType* ctns = this->get_super();
- return (ANTLR_MARKER)(ctns->get_p());
- }
- template<class ImplTraits>
- ANTLR_UINT32 TreeNodeIntStream<ImplTraits>::LA(ANTLR_INT32 i)
- {
- TreeNodeStreamType* tns = this->get_super();
- // Ask LT for the 'token' at that position
- //
- TreeTypePtr t = tns->LT(i);
- if (t == NULL)
- {
- return CommonTokenType::TOKEN_INVALID;
- }
- // Token node was there so return the type of it
- //
- return t->get_type();
- }
- template<class ImplTraits>
- ANTLR_MARKER TreeNodeIntStream<ImplTraits>::mark()
- {
- TreeNodeStreamType* ctns = this->get_super();
-
- if (ctns->get_p() == -1)
- {
- ctns->fillBufferRoot();
- }
- // Return the current mark point
- //
- this->set_lastMarker( this->index() );
- return this->get_lastMarker();
- }
- template<class ImplTraits>
- void TreeNodeIntStream<ImplTraits>::release(ANTLR_MARKER /*marker*/)
- {
- }
- template<class ImplTraits>
- void TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker)
- {
- this->seek(marker);
- }
- template<class ImplTraits>
- void TreeNodeIntStream<ImplTraits>::rewindLast()
- {
- this->seek( this->get_lastMarker() );
- }
- template<class ImplTraits>
- void TreeNodeIntStream<ImplTraits>::seek(ANTLR_MARKER index)
- {
- TreeNodeStreamType* ctns = this->get_super();
- ctns->set_p( ANTLR_UINT32_CAST(index) );
- }
- template<class ImplTraits>
- ANTLR_UINT32 TreeNodeIntStream<ImplTraits>::size()
- {
- TreeNodeStreamType* ctns = this->get_super();
-
- if (ctns->get_p() == -1)
- {
- ctns->fillBufferRoot();
- }
- return ctns->get_nodes().size();
- }
- }
|