antlr3lexer.inl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. namespace antlr3 {
  2. template<class ImplTraits>
  3. Lexer<ImplTraits>::Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state)
  4. :Lexer<ImplTraits>::RecognizerType(sizeHint, state)
  5. ,m_input(NULL)
  6. {
  7. }
  8. template<class ImplTraits>
  9. Lexer<ImplTraits>::Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state)
  10. :Lexer<ImplTraits>::RecognizerType(sizeHint, state)
  11. {
  12. this->setCharStream(input);
  13. }
  14. template<class ImplTraits>
  15. typename Lexer<ImplTraits>::InputStreamType* Lexer<ImplTraits>::get_input() const
  16. {
  17. return m_input;
  18. }
  19. template<class ImplTraits>
  20. typename Lexer<ImplTraits>::IntStreamType* Lexer<ImplTraits>::get_istream() const
  21. {
  22. return m_input;
  23. }
  24. template<class ImplTraits>
  25. typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_rec()
  26. {
  27. return this;
  28. }
  29. template<class ImplTraits>
  30. typename Lexer<ImplTraits>::TokenSourceType* Lexer<ImplTraits>::get_tokSource()
  31. {
  32. return this;
  33. }
  34. template<class ImplTraits>
  35. void Lexer<ImplTraits>::displayRecognitionError( ANTLR_UINT8** , ExceptionBaseType* ex)
  36. {
  37. StringStreamType err_stream;
  38. // See if there is a 'filename' we can use
  39. //
  40. if( ex->getName().empty() )
  41. {
  42. err_stream << "-unknown source-(";
  43. }
  44. else
  45. {
  46. err_stream << ex->get_streamName().c_str();
  47. err_stream << "(";
  48. }
  49. err_stream << ex->get_line() << ")";
  50. err_stream << ": lexer error " << ex->getName() << '(' << ex->getType() << ')' << " :\n\t"
  51. << ex->get_message() << " at position [" << ex->get_line() << ", "
  52. << ex->get_charPositionInLine()+1 << "], ";
  53. {
  54. ANTLR_UINT32 width;
  55. width = ANTLR_UINT32_CAST(( (ANTLR_UINT8*)(m_input->get_data()) +
  56. (m_input->size() )) - (ANTLR_UINT8*)( ex->get_index() ));
  57. if (width >= 1)
  58. {
  59. if (isprint(ex->get_c() ))
  60. {
  61. err_stream << "near '" << (typename StringType::value_type) ex->get_c() << "' :\n";
  62. }
  63. else
  64. {
  65. err_stream << "near char(" << std::hex << ex->get_c() << std::dec << ") :\n";
  66. }
  67. err_stream << "\t";
  68. err_stream.width( width > 20 ? 20 : width );
  69. err_stream << (typename StringType::const_pointer)ex->get_index() << "\n";
  70. }
  71. else
  72. {
  73. err_stream << "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n";
  74. err_stream << "\t The lexer was matching from line "
  75. << this->get_state()->get_tokenStartLine()
  76. << ", offset " << this->get_state()->get_tokenStartCharPositionInLine()
  77. << ", which\n\t ";
  78. width = ANTLR_UINT32_CAST(((ANTLR_UINT8*)(m_input->get_data() )+
  79. (m_input->size())) -
  80. (ANTLR_UINT8*)(this->get_state()->get_tokenStartCharIndex() ));
  81. if (width >= 1)
  82. {
  83. err_stream << "looks like this:\n\t\t";
  84. err_stream.width( width > 20 ? 20 : width );
  85. err_stream << (typename StringType::const_pointer)this->get_state()->get_tokenStartCharIndex() << "\n";
  86. }
  87. else
  88. {
  89. err_stream << "is also the end of the line, so you must check your lexer rules\n";
  90. }
  91. }
  92. }
  93. ImplTraits::displayRecognitionError( err_stream.str() );
  94. }
  95. template<class ImplTraits>
  96. void Lexer<ImplTraits>::fillExceptionData( ExceptionBaseType* ex )
  97. {
  98. ex->set_c( m_input->LA(1) ); /* Current input character */
  99. ex->set_line( m_input->get_line() ); /* Line number comes from stream */
  100. ex->set_charPositionInLine( m_input->get_charPositionInLine() ); /* Line offset also comes from the stream */
  101. ex->set_index( m_input->index() );
  102. ex->set_streamName( m_input->get_fileName() );
  103. ex->set_message( "Unexpected character" );
  104. }
  105. template<class ImplTraits>
  106. void Lexer<ImplTraits>::setCharStream(InputStreamType* input)
  107. {
  108. /* Install the input interface
  109. */
  110. m_input = input;
  111. /* Set the current token to nothing
  112. */
  113. RecognizerSharedStateType* state = this->get_rec()->get_state();
  114. state->set_token_present( false );
  115. state->set_text("");
  116. state->set_tokenStartCharIndex(-1);
  117. /* Copy the name of the char stream to the token source
  118. */
  119. this->get_tokSource()->set_fileName( input->get_fileName() );
  120. }
  121. template<class ImplTraits>
  122. void Lexer<ImplTraits>::pushCharStream(InputStreamType* input)
  123. {
  124. // We have a stack, so we can save the current input stream
  125. // into it.
  126. //
  127. this->get_istream()->mark();
  128. this->get_rec()->get_state()->get_streams().push(this->get_input());
  129. // And now we can install this new one
  130. //
  131. this->setCharStream(input);
  132. }
  133. template<class ImplTraits>
  134. void Lexer<ImplTraits>::popCharStream()
  135. {
  136. InputStreamType* input;
  137. // If we do not have a stream stack or we are already at the
  138. // stack bottom, then do nothing.
  139. //
  140. typename RecognizerSharedStateType::StreamsType& streams = this->get_rec()->get_state()->get_streams();
  141. if ( streams.size() > 0)
  142. {
  143. // We just leave the current stream to its fate, we do not close
  144. // it or anything as we do not know what the programmer intended
  145. // for it. This method can always be overridden of course.
  146. // So just find out what was currently saved on the stack and use
  147. // that now, then pop it from the stack.
  148. //
  149. input = streams.top();
  150. streams.pop();
  151. // Now install the stream as the current one.
  152. //
  153. this->setCharStream(input);
  154. this->get_istream()->rewindLast();
  155. }
  156. return;
  157. }
  158. template<class ImplTraits>
  159. void Lexer<ImplTraits>::emit(const CommonTokenType* token)
  160. {
  161. this->get_rec()->get_state()->set_token(token);
  162. }
  163. template<class ImplTraits>
  164. typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::emit()
  165. {
  166. /* We could check pointers to token factories and so on, but
  167. * we are in code that we want to run as fast as possible
  168. * so we are not checking any errors. So make sure you have installed an input stream before
  169. * trying to emit a new token.
  170. */
  171. RecognizerSharedStateType* state = this->get_rec()->get_state();
  172. state->set_token_present(true);
  173. CommonTokenType* token = state->get_token();
  174. token->set_input( this->get_input() );
  175. /* Install the supplied information, and some other bits we already know
  176. * get added automatically, such as the input stream it is associated with
  177. * (though it can all be overridden of course)
  178. */
  179. token->set_type( state->get_type() );
  180. token->set_channel( state->get_channel() );
  181. token->set_startIndex( state->get_tokenStartCharIndex() );
  182. token->set_stopIndex( this->getCharIndex() - 1 );
  183. token->set_line( state->get_tokenStartLine() );
  184. token->set_charPositionInLine( state->get_tokenStartCharPositionInLine() );
  185. token->set_tokText( state->get_text() );
  186. token->set_lineStart( this->get_input()->get_currentLine() );
  187. return token;
  188. }
  189. template<class ImplTraits>
  190. Lexer<ImplTraits>::~Lexer()
  191. {
  192. // This may have ben a delegate or delegator lexer, in which case the
  193. // state may already have been freed (and set to NULL therefore)
  194. // so we ignore the state if we don't have it.
  195. //
  196. RecognizerSharedStateType* state = this->get_rec()->get_state();
  197. if ( state != NULL)
  198. {
  199. state->get_streams().clear();
  200. }
  201. }
  202. template<class ImplTraits>
  203. bool Lexer<ImplTraits>::matchs(ANTLR_UCHAR* str )
  204. {
  205. RecognizerSharedStateType* state = this->get_rec()->get_state();
  206. while (*str != ANTLR_STRING_TERMINATOR)
  207. {
  208. if ( this->get_istream()->LA(1) != (*str))
  209. {
  210. if ( state->get_backtracking() > 0)
  211. {
  212. state->set_failed(true);
  213. return false;
  214. }
  215. this->exConstruct();
  216. state->set_failed( true );
  217. /* TODO: Implement exception creation more fully perhaps
  218. */
  219. this->recover();
  220. return false;
  221. }
  222. /* Matched correctly, do consume it
  223. */
  224. this->get_istream()->consume();
  225. str++;
  226. }
  227. /* Reset any failed indicator
  228. */
  229. state->set_failed( false );
  230. return true;
  231. }
  232. template<class ImplTraits>
  233. bool Lexer<ImplTraits>::matchc(ANTLR_UCHAR c)
  234. {
  235. if (this->get_istream()->LA(1) == c)
  236. {
  237. /* Matched correctly, do consume it
  238. */
  239. this->get_istream()->consume();
  240. /* Reset any failed indicator
  241. */
  242. this->get_rec()->get_state()->set_failed( false );
  243. return true;
  244. }
  245. /* Failed to match, exception and recovery time.
  246. */
  247. if(this->get_rec()->get_state()->get_backtracking() > 0)
  248. {
  249. this->get_rec()->get_state()->set_failed( true );
  250. return false;
  251. }
  252. this->exConstruct();
  253. /* TODO: Implement exception creation more fully perhaps
  254. */
  255. this->recover();
  256. return false;
  257. }
  258. template<class ImplTraits>
  259. bool Lexer<ImplTraits>::matchRange(ANTLR_UCHAR low, ANTLR_UCHAR high)
  260. {
  261. ANTLR_UCHAR c;
  262. /* What is in the stream at the moment?
  263. */
  264. c = this->get_istream()->LA(1);
  265. if ( c >= low && c <= high)
  266. {
  267. /* Matched correctly, consume it
  268. */
  269. this->get_istream()->consume();
  270. /* Reset any failed indicator
  271. */
  272. this->get_rec()->get_state()->set_failed( false );
  273. return true;
  274. }
  275. /* Failed to match, execption and recovery time.
  276. */
  277. if (this->get_rec()->get_state()->get_backtracking() > 0)
  278. {
  279. this->get_rec()->get_state()->set_failed( true );
  280. return false;
  281. }
  282. this->exConstruct();
  283. /* TODO: Implement exception creation more fully
  284. */
  285. this->recover();
  286. return false;
  287. }
  288. template<class ImplTraits>
  289. void Lexer<ImplTraits>::matchAny()
  290. {
  291. this->get_istream()->consume();
  292. }
  293. template<class ImplTraits>
  294. void Lexer<ImplTraits>::recover()
  295. {
  296. this->get_istream()->consume();
  297. }
  298. template<class ImplTraits>
  299. ANTLR_UINT32 Lexer<ImplTraits>::getLine()
  300. {
  301. return this->get_input()->get_line();
  302. }
  303. template<class ImplTraits>
  304. ANTLR_MARKER Lexer<ImplTraits>::getCharIndex()
  305. {
  306. return this->get_istream()->index();
  307. }
  308. template<class ImplTraits>
  309. ANTLR_UINT32 Lexer<ImplTraits>::getCharPositionInLine()
  310. {
  311. return this->get_input()->get_charPositionInLine();
  312. }
  313. template<class ImplTraits>
  314. typename Lexer<ImplTraits>::StringType Lexer<ImplTraits>::getText()
  315. {
  316. RecognizerSharedStateType* state = this->get_rec()->get_state();
  317. if ( !state->get_text().empty() )
  318. {
  319. return state->get_text();
  320. }
  321. return this->get_input()->substr( state->get_tokenStartCharIndex(),
  322. this->getCharIndex() - this->get_input()->get_charByteSize()
  323. );
  324. }
  325. template<class ImplTraits>
  326. void Lexer<ImplTraits>::exConstruct()
  327. {
  328. new ANTLR_Exception<ImplTraits, RECOGNITION_EXCEPTION, InputStreamType>( this->get_rec(), "" );
  329. }
  330. template< class ImplTraits>
  331. typename Lexer<ImplTraits>::TokenType* Lexer<ImplTraits>::getMissingSymbol( IntStreamType*,
  332. ExceptionBaseType*,
  333. ANTLR_UINT32 , BitsetListType*)
  334. {
  335. return NULL;
  336. }
  337. template< class ImplTraits>
  338. ANTLR_INLINE const typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_rec() const
  339. {
  340. return this;
  341. }
  342. template< class ImplTraits>
  343. ANTLR_INLINE const typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_recognizer() const
  344. {
  345. return this->get_rec();
  346. }
  347. template< class ImplTraits>
  348. ANTLR_INLINE typename Lexer<ImplTraits>::RecognizerSharedStateType* Lexer<ImplTraits>::get_lexstate() const
  349. {
  350. return this->get_rec()->get_state();
  351. }
  352. template< class ImplTraits>
  353. ANTLR_INLINE void Lexer<ImplTraits>::set_lexstate( RecognizerSharedStateType* lexstate )
  354. {
  355. this->get_rec()->set_state(lexstate);
  356. }
  357. template< class ImplTraits>
  358. ANTLR_INLINE const typename Lexer<ImplTraits>::TokenSourceType* Lexer<ImplTraits>::get_tokSource() const
  359. {
  360. return this;
  361. }
  362. template< class ImplTraits>
  363. ANTLR_INLINE typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::get_ltoken() const
  364. {
  365. return this->get_lexstate()->token();
  366. }
  367. template< class ImplTraits>
  368. ANTLR_INLINE void Lexer<ImplTraits>::set_ltoken( const CommonTokenType* ltoken )
  369. {
  370. this->get_lexstate()->set_token( ltoken );
  371. }
  372. template< class ImplTraits>
  373. ANTLR_INLINE bool Lexer<ImplTraits>::hasFailed() const
  374. {
  375. return this->get_lexstate()->get_failed();
  376. }
  377. template< class ImplTraits>
  378. ANTLR_INLINE ANTLR_INT32 Lexer<ImplTraits>::get_backtracking() const
  379. {
  380. return this->get_lexstate()->get_backtracking();
  381. }
  382. template< class ImplTraits>
  383. ANTLR_INLINE void Lexer<ImplTraits>::inc_backtracking()
  384. {
  385. this->get_lexstate()->inc_backtracking();
  386. }
  387. template< class ImplTraits>
  388. ANTLR_INLINE void Lexer<ImplTraits>::dec_backtracking()
  389. {
  390. this->get_lexstate()->dec_backtracking();
  391. }
  392. template< class ImplTraits>
  393. ANTLR_INLINE bool Lexer<ImplTraits>::get_failedflag() const
  394. {
  395. return this->get_lexstate()->get_failed();
  396. }
  397. template< class ImplTraits>
  398. ANTLR_INLINE void Lexer<ImplTraits>::set_failedflag( bool failed )
  399. {
  400. this->get_lexstate()->set_failed(failed);
  401. }
  402. template< class ImplTraits>
  403. ANTLR_INLINE typename Lexer<ImplTraits>::InputStreamType* Lexer<ImplTraits>::get_strstream() const
  404. {
  405. return this->get_input();
  406. }
  407. template< class ImplTraits>
  408. ANTLR_INLINE ANTLR_MARKER Lexer<ImplTraits>::index() const
  409. {
  410. return this->get_istream()->index();
  411. }
  412. template< class ImplTraits>
  413. ANTLR_INLINE void Lexer<ImplTraits>::seek(ANTLR_MARKER index)
  414. {
  415. this->get_istream()->seek(index);
  416. }
  417. template< class ImplTraits>
  418. ANTLR_INLINE const typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::EOF_Token() const
  419. {
  420. const CommonTokenType& eof_token = this->get_tokSource()->get_eofToken();
  421. return &eof_token;
  422. }
  423. template< class ImplTraits>
  424. ANTLR_INLINE bool Lexer<ImplTraits>::hasException() const
  425. {
  426. return this->get_lexstate()->get_error();
  427. }
  428. template< class ImplTraits>
  429. ANTLR_INLINE typename Lexer<ImplTraits>::ExceptionBaseType* Lexer<ImplTraits>::get_exception() const
  430. {
  431. return this->get_lexstate()->get_exception();
  432. }
  433. template< class ImplTraits>
  434. ANTLR_INLINE void Lexer<ImplTraits>::constructEx()
  435. {
  436. this->get_rec()->exConstruct();
  437. }
  438. template< class ImplTraits>
  439. ANTLR_INLINE ANTLR_MARKER Lexer<ImplTraits>::mark()
  440. {
  441. return this->get_istream()->mark();
  442. }
  443. template< class ImplTraits>
  444. ANTLR_INLINE void Lexer<ImplTraits>::rewind(ANTLR_MARKER marker)
  445. {
  446. this->get_istream()->rewind(marker);
  447. }
  448. template< class ImplTraits>
  449. ANTLR_INLINE void Lexer<ImplTraits>::rewindLast()
  450. {
  451. this->get_istream()->rewindLast();
  452. }
  453. template< class ImplTraits>
  454. ANTLR_INLINE void Lexer<ImplTraits>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart)
  455. {
  456. this->get_rec()->memoize( ruleIndex, ruleParseStart );
  457. }
  458. template< class ImplTraits>
  459. ANTLR_INLINE bool Lexer<ImplTraits>::haveParsedRule(ANTLR_MARKER ruleIndex)
  460. {
  461. return this->get_rec()->alreadyParsedRule(ruleIndex);
  462. }
  463. template< class ImplTraits>
  464. ANTLR_INLINE void Lexer<ImplTraits>::setText( const StringType& text )
  465. {
  466. this->get_lexstate()->set_text(text);
  467. }
  468. template< class ImplTraits>
  469. ANTLR_INLINE void Lexer<ImplTraits>::skip()
  470. {
  471. CommonTokenType& skipToken = this->get_tokSource()->get_skipToken();
  472. this->get_lexstate()->set_token( &skipToken );
  473. }
  474. template< class ImplTraits>
  475. ANTLR_INLINE typename Lexer<ImplTraits>::RuleMemoType* Lexer<ImplTraits>::getRuleMemo() const
  476. {
  477. return this->get_lexstate()->get_rulememo();
  478. }
  479. template< class ImplTraits>
  480. ANTLR_INLINE void Lexer<ImplTraits>::setRuleMemo(RuleMemoType* rulememo)
  481. {
  482. return this->get_lexstate()->set_rulememo(rulememo);
  483. }
  484. template< class ImplTraits>
  485. ANTLR_INLINE typename Lexer<ImplTraits>::DebuggerType* Lexer<ImplTraits>::get_debugger() const
  486. {
  487. return this->get_rec()->get_debugger();
  488. }
  489. template< class ImplTraits>
  490. ANTLR_INLINE ANTLR_UINT32 Lexer<ImplTraits>::LA(ANTLR_INT32 i)
  491. {
  492. return this->get_istream()->LA(i);
  493. }
  494. template< class ImplTraits>
  495. ANTLR_INLINE void Lexer<ImplTraits>::consume()
  496. {
  497. return this->get_istream()->consume();
  498. }
  499. }