antlr3intstream.inl 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661
  1. namespace antlr3 {
  2. template<class ImplTraits, class SuperType>
  3. ANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream()
  4. {
  5. m_lastMarker = 0;
  6. m_upper_case = false;
  7. }
  8. template<class ImplTraits, class SuperType>
  9. ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType IntStream<ImplTraits, SuperType>::getSourceName()
  10. {
  11. return m_streamName;
  12. }
  13. template<class ImplTraits, class SuperType>
  14. ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName()
  15. {
  16. return m_streamName;
  17. }
  18. template<class ImplTraits, class SuperType>
  19. ANTLR_INLINE const typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName() const
  20. {
  21. return m_streamName;
  22. }
  23. template<class ImplTraits, class SuperType>
  24. ANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const
  25. {
  26. return m_lastMarker;
  27. }
  28. template<class ImplTraits, class SuperType>
  29. ANTLR_INLINE void IntStream<ImplTraits, SuperType>::setUcaseLA(bool flag)
  30. {
  31. m_upper_case = flag;
  32. }
  33. template<class ImplTraits, class SuperType>
  34. ANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super()
  35. {
  36. return static_cast<SuperType*>(this);
  37. }
  38. template<class ImplTraits, class SuperType>
  39. void IntStream<ImplTraits, SuperType>::consume()
  40. {
  41. SuperType* input = this->get_super();
  42. const ANTLR_UINT8* nextChar = input->get_nextChar();
  43. const ANTLR_UINT8* data = input->get_data();
  44. ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
  45. if ( nextChar < ( data + sizeBuf ) )
  46. {
  47. /* Indicate one more character in this line
  48. */
  49. input->inc_charPositionInLine();
  50. if ((ANTLR_UCHAR)(*(nextChar)) == input->get_newlineChar() )
  51. {
  52. /* Reset for start of a new line of input
  53. */
  54. input->inc_line();
  55. input->set_charPositionInLine(0);
  56. input->set_currentLine(nextChar + 1);
  57. }
  58. /* Increment to next character position
  59. */
  60. input->set_nextChar( nextChar + 1 );
  61. }
  62. }
  63. template<class ImplTraits, class SuperType>
  64. ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la )
  65. {
  66. SuperType* input = this->get_super();
  67. const ANTLR_UINT8* nextChar = input->get_nextChar();
  68. const ANTLR_UINT8* data = input->get_data();
  69. ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
  70. if (( nextChar + la - 1) >= (data + sizeBuf))
  71. {
  72. return ANTLR_CHARSTREAM_EOF;
  73. }
  74. else
  75. {
  76. if( !m_upper_case )
  77. return (ANTLR_UCHAR)(*(nextChar + la - 1));
  78. else
  79. return (ANTLR_UCHAR)toupper(*(nextChar + la - 1));
  80. }
  81. }
  82. template<class ImplTraits, class SuperType>
  83. ANTLR_MARKER IntStream<ImplTraits, SuperType>::mark()
  84. {
  85. LexState<ImplTraits>* state;
  86. SuperType* input = this->get_super();
  87. /* New mark point
  88. */
  89. input->inc_markDepth();
  90. /* See if we are revisiting a mark as we can just reuse the vector
  91. * entry if we are, otherwise, we need a new one
  92. */
  93. if (input->get_markDepth() > input->get_markers().size() )
  94. {
  95. input->get_markers().push_back( LexState<ImplTraits>() );
  96. LexState<ImplTraits>& state_r = input->get_markers().back();
  97. state = &state_r;
  98. }
  99. else
  100. {
  101. LexState<ImplTraits>& state_r = input->get_markers().at( input->get_markDepth() - 1 );
  102. state = &state_r;
  103. /* Assume no errors for speed, it will just blow up if the table failed
  104. * for some reasons, hence lots of unit tests on the tables ;-)
  105. */
  106. }
  107. /* We have created or retrieved the state, so update it with the current
  108. * elements of the lexer state.
  109. */
  110. state->set_charPositionInLine( input->get_charPositionInLine() );
  111. state->set_currentLine( input->get_currentLine() );
  112. state->set_line( input->get_line() );
  113. state->set_nextChar( input->get_nextChar() );
  114. m_lastMarker = input->get_markDepth();
  115. /* And that's it
  116. */
  117. return input->get_markDepth();
  118. }
  119. template<class ImplTraits, class SuperType>
  120. ANTLR_MARKER IntStream<ImplTraits, SuperType>::index()
  121. {
  122. SuperType* input = this->get_super();
  123. return input->index_impl();
  124. }
  125. template<class ImplTraits, class SuperType>
  126. void IntStream<ImplTraits, SuperType>::rewind(ANTLR_MARKER mark)
  127. {
  128. SuperType* input = this->get_super();
  129. /* Perform any clean up of the marks
  130. */
  131. this->release(mark);
  132. /* Find the supplied mark state
  133. */
  134. ANTLR_UINT32 idx = static_cast<ANTLR_UINT32>( mark-1 );
  135. typename ImplTraits::LexStateType& state = input->get_markers().at( idx );
  136. /* Seek input pointer to the requested point (note we supply the void *pointer
  137. * to whatever is implementing the int stream to seek).
  138. */
  139. this->seek( (ANTLR_MARKER)state.get_nextChar() );
  140. /* Reset to the reset of the information in the mark
  141. */
  142. input->set_charPositionInLine( state.get_charPositionInLine() );
  143. input->set_currentLine( state.get_currentLine() );
  144. input->set_line( state.get_line() );
  145. input->set_nextChar( state.get_nextChar() );
  146. /* And we are done
  147. */
  148. }
  149. template<class ImplTraits, class SuperType>
  150. void IntStream<ImplTraits, SuperType>::rewindLast()
  151. {
  152. this->rewind(m_lastMarker);
  153. }
  154. template<class ImplTraits, class SuperType>
  155. void IntStream<ImplTraits, SuperType>::release(ANTLR_MARKER mark)
  156. {
  157. SuperType* input = this->get_super();
  158. /* We don't do much here in fact as we never free any higher marks in
  159. * the hashtable as we just resuse any memory allocated for them.
  160. */
  161. input->set_markDepth( (ANTLR_UINT32)(mark - 1) );
  162. }
  163. template<class ImplTraits, class SuperType>
  164. void IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
  165. {
  166. }
  167. template<class ImplTraits, class SuperType>
  168. void IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
  169. {
  170. ANTLR_INT32 count;
  171. SuperType* input = this->get_super();
  172. ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar();
  173. /* If the requested seek point is less than the current
  174. * input point, then we assume that we are resetting from a mark
  175. * and do not need to scan, but can just set to there.
  176. */
  177. if (seekPoint <= nextChar)
  178. {
  179. input->set_nextChar((ANTLR_UINT8*) seekPoint);
  180. }
  181. else
  182. {
  183. count = (ANTLR_UINT32)(seekPoint - nextChar);
  184. while (count--)
  185. {
  186. this->consume();
  187. }
  188. }
  189. }
  190. template<class ImplTraits, class SuperType>
  191. IntStream<ImplTraits, SuperType>::~IntStream()
  192. {
  193. }
  194. template<class ImplTraits, class SuperType>
  195. ANTLR_UINT32 EBCDIC_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la)
  196. {
  197. // EBCDIC to ASCII conversion table
  198. //
  199. // This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX
  200. // translation and the character tables are published all over the interweb.
  201. //
  202. const ANTLR_UCHAR e2a[256] =
  203. {
  204. 0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f,
  205. 0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  206. 0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97,
  207. 0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f,
  208. 0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b,
  209. 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
  210. 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
  211. 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
  212. 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
  213. 0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
  214. 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
  215. 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f,
  216. 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
  217. 0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
  218. 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
  219. 0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
  220. 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  221. 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
  222. 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
  223. 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
  224. 0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
  225. 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae,
  226. 0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
  227. 0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7,
  228. 0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
  229. 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
  230. 0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
  231. 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff,
  232. 0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
  233. 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
  234. 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  235. 0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e
  236. };
  237. SuperType* input = this->get_super();
  238. if (( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() ))
  239. {
  240. return ANTLR_CHARSTREAM_EOF;
  241. }
  242. else
  243. {
  244. // Translate the required character via the constant conversion table
  245. //
  246. return e2a[(*(input->get_nextChar() + la - 1))];
  247. }
  248. }
  249. template<class ImplTraits, class SuperType>
  250. void EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream()
  251. {
  252. SuperType* super = this->get_super();
  253. super->set_charByteSize(1);
  254. }
  255. template<class ImplTraits, class SuperType>
  256. ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i)
  257. {
  258. return this->LA(i, ClassForwarder< typename ImplTraits::Endianness >() );
  259. }
  260. template<class ImplTraits, class SuperType>
  261. void UTF16_IntStream<ImplTraits, SuperType>::consume()
  262. {
  263. this->consume( ClassForwarder< typename ImplTraits::Endianness >() );
  264. }
  265. template<class ImplTraits, class SuperType>
  266. ANTLR_MARKER UTF16_IntStream<ImplTraits, SuperType>::index()
  267. {
  268. SuperType* input = this->get_super();
  269. return (ANTLR_MARKER)(input->get_nextChar());
  270. }
  271. template<class ImplTraits, class SuperType>
  272. void UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
  273. {
  274. SuperType* input = this->get_super();
  275. // If the requested seek point is less than the current
  276. // input point, then we assume that we are resetting from a mark
  277. // and do not need to scan, but can just set to there as rewind will
  278. // reset line numbers and so on.
  279. //
  280. if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
  281. {
  282. input->set_nextChar( seekPoint );
  283. }
  284. else
  285. {
  286. // Call consume until we reach the asked for seek point or EOF
  287. //
  288. while( (this->LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) )
  289. {
  290. this->consume();
  291. }
  292. }
  293. }
  294. template<class ImplTraits, class SuperType>
  295. void IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian)
  296. {
  297. // We must install different UTF16 routines according to whether the input
  298. // is the same endianess as the machine we are executing upon or not. If it is not
  299. // then we must install methods that can convert the endianess on the fly as they go
  300. //
  301. if(machineBigEndian == true)
  302. {
  303. // Machine is Big Endian, if the input is also then install the
  304. // methods that do not access input by bytes and reverse them.
  305. // Otherwise install endian aware methods.
  306. //
  307. if (inputBigEndian == true)
  308. {
  309. // Input is machine compatible
  310. //
  311. m_endian_spec = 1;
  312. }
  313. else
  314. {
  315. // Need to use methods that know that the input is little endian
  316. //
  317. m_endian_spec = 2;
  318. }
  319. }
  320. else
  321. {
  322. // Machine is Little Endian, if the input is also then install the
  323. // methods that do not access input by bytes and reverse them.
  324. // Otherwise install endian aware methods.
  325. //
  326. if (inputBigEndian == false)
  327. {
  328. // Input is machine compatible
  329. //
  330. m_endian_spec = 1;
  331. }
  332. else
  333. {
  334. // Need to use methods that know that the input is Big Endian
  335. //
  336. m_endian_spec = 3;
  337. }
  338. }
  339. }
  340. template<class ImplTraits, class SuperType>
  341. void UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
  342. {
  343. SuperType* super = this->get_super();
  344. super->set_charByteSize(2);
  345. this->findout_endian_spec( machineBigEndian, inputBigEndian );
  346. }
  347. template<class ImplTraits, class SuperType>
  348. ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
  349. {
  350. assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
  351. switch(m_endian_spec)
  352. {
  353. case 1:
  354. return this->LA(i, ClassForwarder<BYTE_AGNOSTIC>() );
  355. break;
  356. case 2:
  357. return this->LA(i, ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
  358. break;
  359. case 3:
  360. return this->LA(i, ClassForwarder<ANTLR_BIG_ENDIAN>() );
  361. break;
  362. default:
  363. break;
  364. }
  365. return 0;
  366. }
  367. template<class ImplTraits, class SuperType>
  368. void IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
  369. {
  370. assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
  371. switch(m_endian_spec)
  372. {
  373. case 1:
  374. this->consume( ClassForwarder<BYTE_AGNOSTIC>() );
  375. break;
  376. case 2:
  377. this->consume( ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
  378. break;
  379. case 3:
  380. this->consume( ClassForwarder<ANTLR_BIG_ENDIAN>() );
  381. break;
  382. default:
  383. break;
  384. }
  385. }
  386. template<class ImplTraits, class SuperType>
  387. ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
  388. {
  389. SuperType* input;
  390. UTF32 ch;
  391. UTF32 ch2;
  392. UTF16* nextChar;
  393. // Find the input interface and where we are currently pointing to
  394. // in the input stream
  395. //
  396. input = this->get_super;
  397. nextChar = input->get_nextChar();
  398. // If a positive offset then advance forward, else retreat
  399. //
  400. if (la >= 0)
  401. {
  402. while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
  403. {
  404. // Advance our copy of the input pointer
  405. //
  406. // Next char in natural machine byte order
  407. //
  408. ch = *nextChar++;
  409. // If we have a surrogate pair then we need to consume
  410. // a following valid LO surrogate.
  411. //
  412. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
  413. {
  414. // If the 16 bits following the high surrogate are in the source buffer...
  415. //
  416. if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
  417. {
  418. // Next character is in natural machine byte order
  419. //
  420. ch2 = *nextChar;
  421. // If it's a valid low surrogate, consume it
  422. //
  423. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  424. {
  425. // We consumed one 16 bit character
  426. //
  427. nextChar++;
  428. }
  429. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  430. // it.
  431. //
  432. }
  433. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  434. // it because the buffer ended
  435. //
  436. }
  437. // Note that we did not check for an invalid low surrogate here, or that fact that the
  438. // lo surrogate was missing. We just picked out one 16 bit character unless the character
  439. // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
  440. //
  441. }
  442. }
  443. else
  444. {
  445. // We need to go backwards from our input point
  446. //
  447. while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
  448. {
  449. // Get the previous 16 bit character
  450. //
  451. ch = *--nextChar;
  452. // If we found a low surrogate then go back one more character if
  453. // the hi surrogate is there
  454. //
  455. if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
  456. {
  457. ch2 = *(nextChar-1);
  458. if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
  459. {
  460. // Yes, there is a high surrogate to match it so decrement one more and point to that
  461. //
  462. nextChar--;
  463. }
  464. }
  465. }
  466. }
  467. // Our local copy of nextChar is now pointing to either the correct character or end of file
  468. //
  469. // Input buffer size is always in bytes
  470. //
  471. if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
  472. {
  473. return ANTLR_CHARSTREAM_EOF;
  474. }
  475. else
  476. {
  477. // Pick up the next 16 character (native machine byte order)
  478. //
  479. ch = *nextChar++;
  480. // If we have a surrogate pair then we need to consume
  481. // a following valid LO surrogate.
  482. //
  483. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
  484. {
  485. // If the 16 bits following the high surrogate are in the source buffer...
  486. //
  487. if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
  488. {
  489. // Next character is in natural machine byte order
  490. //
  491. ch2 = *nextChar;
  492. // If it's a valid low surrogate, consume it
  493. //
  494. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  495. {
  496. // Construct the UTF32 code point
  497. //
  498. ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
  499. + (ch2 - UNI_SUR_LOW_START) + halfBase;
  500. }
  501. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  502. // it.
  503. //
  504. }
  505. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  506. // it because the buffer ended
  507. //
  508. }
  509. }
  510. return ch;
  511. }
  512. template<class ImplTraits, class SuperType>
  513. ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
  514. {
  515. SuperType* input;
  516. UTF32 ch;
  517. UTF32 ch2;
  518. ANTLR_UCHAR* nextChar;
  519. // Find the input interface and where we are currently pointing to
  520. // in the input stream
  521. //
  522. input = this->get_super();
  523. nextChar = input->get_nextChar();
  524. // If a positive offset then advance forward, else retreat
  525. //
  526. if (la >= 0)
  527. {
  528. while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
  529. {
  530. // Advance our copy of the input pointer
  531. //
  532. // Next char in Little Endian byte order
  533. //
  534. ch = (*nextChar) + (*(nextChar+1) << 8);
  535. nextChar += 2;
  536. // If we have a surrogate pair then we need to consume
  537. // a following valid LO surrogate.
  538. //
  539. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
  540. {
  541. // If the 16 bits following the high surrogate are in the source buffer...
  542. //
  543. if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
  544. {
  545. // Next character is in little endian byte order
  546. //
  547. ch2 = (*nextChar) + (*(nextChar+1) << 8);
  548. // If it's a valid low surrogate, consume it
  549. //
  550. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  551. {
  552. // We consumed one 16 bit character
  553. //
  554. nextChar += 2;
  555. }
  556. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  557. // it.
  558. //
  559. }
  560. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  561. // it because the buffer ended
  562. //
  563. }
  564. // Note that we did not check for an invalid low surrogate here, or that fact that the
  565. // lo surrogate was missing. We just picked out one 16 bit character unless the character
  566. // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
  567. //
  568. }
  569. }
  570. else
  571. {
  572. // We need to go backwards from our input point
  573. //
  574. while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
  575. {
  576. // Get the previous 16 bit character
  577. //
  578. ch = (*nextChar - 2) + ((*nextChar -1) << 8);
  579. nextChar -= 2;
  580. // If we found a low surrogate then go back one more character if
  581. // the hi surrogate is there
  582. //
  583. if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
  584. {
  585. ch2 = (*nextChar - 2) + ((*nextChar -1) << 8);
  586. if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
  587. {
  588. // Yes, there is a high surrogate to match it so decrement one more and point to that
  589. //
  590. nextChar -=2;
  591. }
  592. }
  593. }
  594. }
  595. // Our local copy of nextChar is now pointing to either the correct character or end of file
  596. //
  597. // Input buffer size is always in bytes
  598. //
  599. if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
  600. {
  601. return ANTLR_CHARSTREAM_EOF;
  602. }
  603. else
  604. {
  605. // Pick up the next 16 character (little endian byte order)
  606. //
  607. ch = (*nextChar) + (*(nextChar+1) << 8);
  608. nextChar += 2;
  609. // If we have a surrogate pair then we need to consume
  610. // a following valid LO surrogate.
  611. //
  612. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
  613. {
  614. // If the 16 bits following the high surrogate are in the source buffer...
  615. //
  616. if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
  617. {
  618. // Next character is in little endian byte order
  619. //
  620. ch2 = (*nextChar) + (*(nextChar+1) << 8);
  621. // If it's a valid low surrogate, consume it
  622. //
  623. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  624. {
  625. // Construct the UTF32 code point
  626. //
  627. ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
  628. + (ch2 - UNI_SUR_LOW_START) + halfBase;
  629. }
  630. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  631. // it.
  632. //
  633. }
  634. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  635. // it because the buffer ended
  636. //
  637. }
  638. }
  639. return ch;
  640. }
  641. template<class ImplTraits, class SuperType>
  642. ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
  643. {
  644. SuperType* input;
  645. UTF32 ch;
  646. UTF32 ch2;
  647. ANTLR_UCHAR* nextChar;
  648. // Find the input interface and where we are currently pointing to
  649. // in the input stream
  650. //
  651. input = this->get_super();
  652. nextChar = input->get_nextChar();
  653. // If a positive offset then advance forward, else retreat
  654. //
  655. if (la >= 0)
  656. {
  657. while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
  658. {
  659. // Advance our copy of the input pointer
  660. //
  661. // Next char in Big Endian byte order
  662. //
  663. ch = ((*nextChar) << 8) + *(nextChar+1);
  664. nextChar += 2;
  665. // If we have a surrogate pair then we need to consume
  666. // a following valid LO surrogate.
  667. //
  668. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
  669. {
  670. // If the 16 bits following the high surrogate are in the source buffer...
  671. //
  672. if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
  673. {
  674. // Next character is in big endian byte order
  675. //
  676. ch2 = ((*nextChar) << 8) + *(nextChar+1);
  677. // If it's a valid low surrogate, consume it
  678. //
  679. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  680. {
  681. // We consumed one 16 bit character
  682. //
  683. nextChar += 2;
  684. }
  685. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  686. // it.
  687. //
  688. }
  689. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  690. // it because the buffer ended
  691. //
  692. }
  693. // Note that we did not check for an invalid low surrogate here, or that fact that the
  694. // lo surrogate was missing. We just picked out one 16 bit character unless the character
  695. // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
  696. //
  697. }
  698. }
  699. else
  700. {
  701. // We need to go backwards from our input point
  702. //
  703. while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
  704. {
  705. // Get the previous 16 bit character
  706. //
  707. ch = ((*nextChar - 2) << 8) + (*nextChar -1);
  708. nextChar -= 2;
  709. // If we found a low surrogate then go back one more character if
  710. // the hi surrogate is there
  711. //
  712. if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
  713. {
  714. ch2 = ((*nextChar - 2) << 8) + (*nextChar -1);
  715. if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
  716. {
  717. // Yes, there is a high surrogate to match it so decrement one more and point to that
  718. //
  719. nextChar -=2;
  720. }
  721. }
  722. }
  723. }
  724. // Our local copy of nextChar is now pointing to either the correct character or end of file
  725. //
  726. // Input buffer size is always in bytes
  727. //
  728. if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
  729. {
  730. return ANTLR_CHARSTREAM_EOF;
  731. }
  732. else
  733. {
  734. // Pick up the next 16 character (big endian byte order)
  735. //
  736. ch = ((*nextChar) << 8) + *(nextChar+1);
  737. nextChar += 2;
  738. // If we have a surrogate pair then we need to consume
  739. // a following valid LO surrogate.
  740. //
  741. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
  742. {
  743. // If the 16 bits following the high surrogate are in the source buffer...
  744. //
  745. if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
  746. {
  747. // Next character is in big endian byte order
  748. //
  749. ch2 = ((*nextChar) << 8) + *(nextChar+1);
  750. // If it's a valid low surrogate, consume it
  751. //
  752. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  753. {
  754. // Construct the UTF32 code point
  755. //
  756. ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
  757. + (ch2 - UNI_SUR_LOW_START) + halfBase;
  758. }
  759. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  760. // it.
  761. //
  762. }
  763. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  764. // it because the buffer ended
  765. //
  766. }
  767. }
  768. return ch;
  769. }
  770. template<class ImplTraits, class SuperType>
  771. void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOSTIC> )
  772. {
  773. SuperType* input;
  774. UTF32 ch;
  775. UTF32 ch2;
  776. input = this->get_super();
  777. // Buffer size is always in bytes
  778. //
  779. if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
  780. {
  781. // Indicate one more character in this line
  782. //
  783. input->inc_charPositionInLine();
  784. if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
  785. {
  786. // Reset for start of a new line of input
  787. //
  788. input->inc_line();
  789. input->set_charPositionInLine(0);
  790. input->set_currentLine( input->get_nextChar() + 1 );
  791. }
  792. // Increment to next character position, accounting for any surrogates
  793. //
  794. // Next char in natural machine byte order
  795. //
  796. ch = *(input->get_nextChar());
  797. // We consumed one 16 bit character
  798. //
  799. input->set_nextChar( input->get_nextChar() + 1 );
  800. // If we have a surrogate pair then we need to consume
  801. // a following valid LO surrogate.
  802. //
  803. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
  804. // If the 16 bits following the high surrogate are in the source buffer...
  805. //
  806. if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
  807. {
  808. // Next character is in natural machine byte order
  809. //
  810. ch2 = *(input->get_nextChar());
  811. // If it's a valid low surrogate, consume it
  812. //
  813. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  814. {
  815. // We consumed one 16 bit character
  816. //
  817. input->set_nextChar( input->get_nextChar() + 1 );
  818. }
  819. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  820. // it.
  821. //
  822. }
  823. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  824. // it because the buffer ended
  825. //
  826. }
  827. // Note that we did not check for an invalid low surrogate here, or that fact that the
  828. // lo surrogate was missing. We just picked out one 16 bit character unless the character
  829. // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
  830. //
  831. }
  832. }
  833. template<class ImplTraits, class SuperType>
  834. void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> )
  835. {
  836. SuperType* input;
  837. UTF32 ch;
  838. UTF32 ch2;
  839. input = this->get_super();
  840. // Buffer size is always in bytes
  841. //
  842. if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
  843. {
  844. // Indicate one more character in this line
  845. //
  846. input->inc_charPositionInLine();
  847. if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
  848. {
  849. // Reset for start of a new line of input
  850. //
  851. input->inc_line();
  852. input->set_charPositionInLine(0);
  853. input->set_currentLine(input->get_nextChar() + 1);
  854. }
  855. // Increment to next character position, accounting for any surrogates
  856. //
  857. // Next char in litle endian form
  858. //
  859. ch = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
  860. // We consumed one 16 bit character
  861. //
  862. input->set_nextChar( input->get_nextChar() + 1);
  863. // If we have a surrogate pair then we need to consume
  864. // a following valid LO surrogate.
  865. //
  866. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
  867. {
  868. // If the 16 bits following the high surrogate are in the source buffer...
  869. //
  870. if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
  871. {
  872. ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
  873. // If it's a valid low surrogate, consume it
  874. //
  875. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  876. {
  877. // We consumed one 16 bit character
  878. //
  879. input->set_nextChar( input->get_nextChar() + 1);
  880. }
  881. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  882. // it.
  883. //
  884. }
  885. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  886. // it because the buffer ended
  887. //
  888. }
  889. // Note that we did not check for an invalid low surrogate here, or that fact that the
  890. // lo surrogate was missing. We just picked out one 16 bit character unless the character
  891. // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
  892. //
  893. }
  894. }
  895. template<class ImplTraits, class SuperType>
  896. void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_ENDIAN> )
  897. {
  898. SuperType* input;
  899. UTF32 ch;
  900. UTF32 ch2;
  901. input = this->get_super();
  902. // Buffer size is always in bytes
  903. //
  904. if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
  905. {
  906. // Indicate one more character in this line
  907. //
  908. input->inc_charPositionInLine();
  909. if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
  910. {
  911. // Reset for start of a new line of input
  912. //
  913. input->inc_line();
  914. input->set_charPositionInLine(0);
  915. input->set_currentLine(input->get_nextChar() + 1);
  916. }
  917. // Increment to next character position, accounting for any surrogates
  918. //
  919. // Next char in big endian form
  920. //
  921. ch = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
  922. // We consumed one 16 bit character
  923. //
  924. input->set_nextChar( input->get_nextChar() + 1);
  925. // If we have a surrogate pair then we need to consume
  926. // a following valid LO surrogate.
  927. //
  928. if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
  929. {
  930. // If the 16 bits following the high surrogate are in the source buffer...
  931. //
  932. if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
  933. {
  934. // Big endian
  935. //
  936. ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
  937. // If it's a valid low surrogate, consume it
  938. //
  939. if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
  940. {
  941. // We consumed one 16 bit character
  942. //
  943. input->set_nextChar( input->get_nextChar() + 1);
  944. }
  945. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  946. // it.
  947. //
  948. }
  949. // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
  950. // it because the buffer ended
  951. //
  952. }
  953. // Note that we did not check for an invalid low surrogate here, or that fact that the
  954. // lo surrogate was missing. We just picked out one 16 bit character unless the character
  955. // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
  956. //
  957. }
  958. }
  959. template<class ImplTraits, class SuperType>
  960. ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i)
  961. {
  962. return this->LA( i, ClassForwarder<typename ImplTraits::Endianness>() );
  963. }
  964. template<class ImplTraits, class SuperType>
  965. ANTLR_MARKER UTF32_IntStream<ImplTraits, SuperType>::index()
  966. {
  967. SuperType* input = this->get_super();
  968. return (ANTLR_MARKER)(input->get_nextChar());
  969. }
  970. template<class ImplTraits, class SuperType>
  971. void UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
  972. {
  973. SuperType* input;
  974. input = this->get_super();
  975. // If the requested seek point is less than the current
  976. // input point, then we assume that we are resetting from a mark
  977. // and do not need to scan, but can just set to there as rewind will
  978. // reset line numbers and so on.
  979. //
  980. if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
  981. {
  982. input->set_nextChar( static_cast<typename ImplTraits::DataType*>(seekPoint) );
  983. }
  984. else
  985. {
  986. // Call consume until we reach the asked for seek point or EOF
  987. //
  988. while( (this->LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) )
  989. {
  990. this->consume();
  991. }
  992. }
  993. }
  994. template<class ImplTraits, class SuperType>
  995. void UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
  996. {
  997. SuperType* super = this->get_super();
  998. super->set_charByteSize(4);
  999. this->findout_endian_spec(machineBigEndian, inputBigEndian);
  1000. }
  1001. template<class ImplTraits, class SuperType>
  1002. ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
  1003. {
  1004. SuperType* input = this->get_super();
  1005. if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
  1006. {
  1007. return ANTLR_CHARSTREAM_EOF;
  1008. }
  1009. else
  1010. {
  1011. return (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
  1012. }
  1013. }
  1014. template<class ImplTraits, class SuperType>
  1015. ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
  1016. {
  1017. SuperType* input = this->get_super();
  1018. if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
  1019. {
  1020. return ANTLR_CHARSTREAM_EOF;
  1021. }
  1022. else
  1023. {
  1024. ANTLR_UCHAR c;
  1025. c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
  1026. // Swap Endianess to Big Endian
  1027. //
  1028. return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
  1029. }
  1030. }
  1031. template<class ImplTraits, class SuperType>
  1032. ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
  1033. {
  1034. SuperType* input = this->get_super();
  1035. if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
  1036. {
  1037. return ANTLR_CHARSTREAM_EOF;
  1038. }
  1039. else
  1040. {
  1041. ANTLR_UCHAR c;
  1042. c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
  1043. // Swap Endianess to Little Endian
  1044. //
  1045. return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
  1046. }
  1047. }
  1048. template<class ImplTraits, class SuperType>
  1049. void UTF32_IntStream<ImplTraits, SuperType>::consume()
  1050. {
  1051. SuperType* input = this->get_super();
  1052. // SizeBuf is always in bytes
  1053. //
  1054. if ( input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/4 ))
  1055. {
  1056. /* Indicate one more character in this line
  1057. */
  1058. input->inc_charPositionInLine();
  1059. if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
  1060. {
  1061. /* Reset for start of a new line of input
  1062. */
  1063. input->inc_line();
  1064. input->set_charPositionInLine(0);
  1065. input->set_currentLine( input->get_nextChar() + 1 );
  1066. }
  1067. /* Increment to next character position
  1068. */
  1069. input->set_nextChar( input->get_nextChar() + 1 );
  1070. }
  1071. }
  1072. template<class ImplTraits, class SuperType>
  1073. void UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
  1074. {
  1075. SuperType* super = this->get_super();
  1076. super->set_charByteSize(0);
  1077. }
  1078. // ------------------------------------------------------
  1079. // Following is from Unicode.org (see antlr3convertutf.c)
  1080. //
  1081. /// Index into the table below with the first byte of a UTF-8 sequence to
  1082. /// get the number of trailing bytes that are supposed to follow it.
  1083. /// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
  1084. /// left as-is for anyone who may want to do such conversion, which was
  1085. /// allowed in earlier algorithms.
  1086. ///
  1087. template<class ImplTraits, class SuperType>
  1088. const ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8()
  1089. {
  1090. static const ANTLR_UINT32 trailingBytesForUTF8[256] = {
  1091. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1092. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1093. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1094. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1095. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1096. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1097. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1098. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
  1099. };
  1100. return trailingBytesForUTF8;
  1101. }
  1102. /// Magic values subtracted from a buffer value during UTF8 conversion.
  1103. /// This table contains as many values as there might be trailing bytes
  1104. /// in a UTF-8 sequence.
  1105. ///
  1106. template<class ImplTraits, class SuperType>
  1107. const UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8()
  1108. {
  1109. static const UTF32 offsetsFromUTF8[6] =
  1110. { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
  1111. 0x03C82080UL, 0xFA082080UL, 0x82082080UL
  1112. };
  1113. return offsetsFromUTF8;
  1114. }
  1115. // End of Unicode.org tables
  1116. // -------------------------
  1117. /** \brief Consume the next character in a UTF8 input stream
  1118. *
  1119. * \param input Input stream context pointer
  1120. */
  1121. template<class ImplTraits, class SuperType>
  1122. void UTF8_IntStream<ImplTraits, SuperType>::consume()
  1123. {
  1124. SuperType* input = this->get_super();
  1125. const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
  1126. const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
  1127. ANTLR_UINT32 extraBytesToRead;
  1128. ANTLR_UCHAR ch;
  1129. ANTLR_UINT8* nextChar;
  1130. nextChar = input->get_nextChar();
  1131. if (nextChar < (input->get_data() + input->get_sizeBuf()))
  1132. {
  1133. // Indicate one more character in this line
  1134. //
  1135. input->inc_charPositionInLine();
  1136. // Are there more bytes needed to make up the whole thing?
  1137. //
  1138. extraBytesToRead = trailingBytesForUTF8[*nextChar];
  1139. if ((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf()))
  1140. {
  1141. input->set_nextChar( input->get_data() + input->get_sizeBuf() );
  1142. return;
  1143. }
  1144. // Cases deliberately fall through (see note A in antlrconvertutf.c)
  1145. // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so
  1146. // we allow it.
  1147. //
  1148. ch = 0;
  1149. switch (extraBytesToRead)
  1150. {
  1151. case 5: ch += *nextChar++; ch <<= 6;
  1152. case 4: ch += *nextChar++; ch <<= 6;
  1153. case 3: ch += *nextChar++; ch <<= 6;
  1154. case 2: ch += *nextChar++; ch <<= 6;
  1155. case 1: ch += *nextChar++; ch <<= 6;
  1156. case 0: ch += *nextChar++;
  1157. }
  1158. // Magically correct the input value
  1159. //
  1160. ch -= offsetsFromUTF8[extraBytesToRead];
  1161. if (ch == input->get_newlineChar())
  1162. {
  1163. /* Reset for start of a new line of input
  1164. */
  1165. input->inc_line();
  1166. input->set_charPositionInLine(0);
  1167. input->set_currentLine(nextChar);
  1168. }
  1169. // Update input pointer
  1170. //
  1171. input->set_nextChar(nextChar);
  1172. }
  1173. }
  1174. /** \brief Return the input element assuming a UTF8 input
  1175. *
  1176. * \param[in] input Input stream context pointer
  1177. * \param[in] la 1 based offset of next input stream element
  1178. *
  1179. * \return Next input character in internal ANTLR3 encoding (UTF32)
  1180. */
  1181. template<class ImplTraits, class SuperType>
  1182. ANTLR_UCHAR UTF8_IntStream<ImplTraits, SuperType>::LA(ANTLR_INT32 la)
  1183. {
  1184. SuperType* input = this->get_super();
  1185. const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
  1186. const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
  1187. ANTLR_UINT32 extraBytesToRead;
  1188. ANTLR_UCHAR ch;
  1189. ANTLR_UINT8* nextChar;
  1190. nextChar = input->get_nextChar();
  1191. // Do we need to traverse forwards or backwards?
  1192. // - LA(0) is treated as LA(1) and we assume that the nextChar is
  1193. // already positioned.
  1194. // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding
  1195. // - LA(-n) means we must traverse backwards n chracters
  1196. //
  1197. if (la > 1) {
  1198. // Make sure that we have at least one character left before trying to
  1199. // loop through the buffer.
  1200. //
  1201. if (nextChar < (input->get_data() + input->get_sizeBuf()))
  1202. {
  1203. // Now traverse n-1 characters forward
  1204. //
  1205. while (--la > 0)
  1206. {
  1207. // Does the next character require trailing bytes?
  1208. // If so advance the pointer by that many bytes as well as advancing
  1209. // one position for what will be at least a single byte character.
  1210. //
  1211. nextChar += trailingBytesForUTF8[*nextChar] + 1;
  1212. // Does that calculation take us past the byte length of the buffer?
  1213. //
  1214. if (nextChar >= (input->get_data() + input->get_sizeBuf()))
  1215. {
  1216. return ANTLR_CHARSTREAM_EOF;
  1217. }
  1218. }
  1219. }
  1220. else
  1221. {
  1222. return ANTLR_CHARSTREAM_EOF;
  1223. }
  1224. }
  1225. else
  1226. {
  1227. // LA is negative so we decrease the pointer by n character positions
  1228. //
  1229. while (nextChar > input->get_data() && la++ < 0)
  1230. {
  1231. // Traversing backwards in UTF8 means decermenting by one
  1232. // then continuing to decrement while ever a character pattern
  1233. // is flagged as being a trailing byte of an encoded code point.
  1234. // Trailing UTF8 bytes always start with 10 in binary. We assumne that
  1235. // the UTF8 is well formed and do not check boundary conditions
  1236. //
  1237. nextChar--;
  1238. while ((*nextChar & 0xC0) == 0x80)
  1239. {
  1240. nextChar--;
  1241. }
  1242. }
  1243. }
  1244. // nextChar is now pointing at the UTF8 encoded character that we need to
  1245. // decode and return.
  1246. //
  1247. // Are there more bytes needed to make up the whole thing?
  1248. //
  1249. extraBytesToRead = trailingBytesForUTF8[*nextChar];
  1250. if (nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf()))
  1251. {
  1252. return ANTLR_CHARSTREAM_EOF;
  1253. }
  1254. // Cases deliberately fall through (see note A in antlrconvertutf.c)
  1255. //
  1256. ch = 0;
  1257. switch (extraBytesToRead)
  1258. {
  1259. case 5: ch += *nextChar++; ch <<= 6;
  1260. case 4: ch += *nextChar++; ch <<= 6;
  1261. case 3: ch += *nextChar++; ch <<= 6;
  1262. case 2: ch += *nextChar++; ch <<= 6;
  1263. case 1: ch += *nextChar++; ch <<= 6;
  1264. case 0: ch += *nextChar++;
  1265. }
  1266. // Magically correct the input value
  1267. //
  1268. ch -= offsetsFromUTF8[extraBytesToRead];
  1269. return ch;
  1270. }
  1271. template<class ImplTraits>
  1272. TokenIntStream<ImplTraits>::TokenIntStream()
  1273. {
  1274. m_cachedSize = 0;
  1275. }
  1276. template<class ImplTraits>
  1277. ANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const
  1278. {
  1279. return m_cachedSize;
  1280. }
  1281. template<class ImplTraits>
  1282. void TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize )
  1283. {
  1284. m_cachedSize = cachedSize;
  1285. }
  1286. /** Move the input pointer to the next incoming token. The stream
  1287. * must become active with LT(1) available. consume() simply
  1288. * moves the input pointer so that LT(1) points at the next
  1289. * input symbol. Consume at least one token.
  1290. *
  1291. * Walk past any token not on the channel the parser is listening to.
  1292. */
  1293. template<class ImplTraits>
  1294. void TokenIntStream<ImplTraits>::consume()
  1295. {
  1296. TokenStreamType* cts = static_cast<TokenStreamType*>(this);
  1297. if((ANTLR_UINT32)cts->get_p() < m_cachedSize )
  1298. {
  1299. cts->inc_p();
  1300. cts->set_p( cts->skipOffTokenChannels(cts->get_p()) );
  1301. }
  1302. }
  1303. template<class ImplTraits>
  1304. void TokenIntStream<ImplTraits>::consumeInitialHiddenTokens()
  1305. {
  1306. ANTLR_MARKER first;
  1307. ANTLR_INT32 i;
  1308. TokenStreamType* ts;
  1309. ts = this->get_super();
  1310. first = this->index();
  1311. for (i=0; i<first; i++)
  1312. {
  1313. ts->get_debugger()->consumeHiddenToken(ts->get(i));
  1314. }
  1315. ts->set_initialStreamState(false);
  1316. }
  1317. template<class ImplTraits>
  1318. ANTLR_UINT32 TokenIntStream<ImplTraits>::LA( ANTLR_INT32 i )
  1319. {
  1320. const CommonTokenType* tok;
  1321. TokenStreamType* ts = static_cast<TokenStreamType*>(this);
  1322. tok = ts->LT(i);
  1323. if (tok != NULL)
  1324. {
  1325. return tok->get_type();
  1326. }
  1327. else
  1328. {
  1329. return CommonTokenType::TOKEN_INVALID;
  1330. }
  1331. }
  1332. template<class ImplTraits>
  1333. ANTLR_MARKER TokenIntStream<ImplTraits>::mark()
  1334. {
  1335. BaseType::m_lastMarker = this->index();
  1336. return BaseType::m_lastMarker;
  1337. }
  1338. template<class ImplTraits>
  1339. ANTLR_UINT32 TokenIntStream<ImplTraits>::size()
  1340. {
  1341. if (this->get_cachedSize() > 0)
  1342. {
  1343. return this->get_cachedSize();
  1344. }
  1345. TokenStreamType* cts = this->get_super();
  1346. this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) );
  1347. return this->get_cachedSize();
  1348. }
  1349. template<class ImplTraits>
  1350. void TokenIntStream<ImplTraits>::release()
  1351. {
  1352. return;
  1353. }
  1354. template<class ImplTraits>
  1355. ANTLR_MARKER TokenIntStream<ImplTraits>::tindex()
  1356. {
  1357. return this->get_super()->get_p();
  1358. }
  1359. template<class ImplTraits>
  1360. void TokenIntStream<ImplTraits>::rewindLast()
  1361. {
  1362. this->rewind( this->get_lastMarker() );
  1363. }
  1364. template<class ImplTraits>
  1365. void TokenIntStream<ImplTraits>::rewind(ANTLR_MARKER marker)
  1366. {
  1367. return this->seek(marker);
  1368. }
  1369. template<class ImplTraits>
  1370. void TokenIntStream<ImplTraits>::seek(ANTLR_MARKER index)
  1371. {
  1372. TokenStreamType* cts = static_cast<TokenStreamType*>(this);
  1373. cts->set_p( static_cast<ANTLR_INT32>(index) );
  1374. }
  1375. /// Return a string that represents the name assoicated with the input source
  1376. ///
  1377. /// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream.
  1378. ///
  1379. /// /returns
  1380. /// /implements ANTLR3_INT_STREAM_struct::getSourceName()
  1381. ///
  1382. template<class ImplTraits>
  1383. typename TokenIntStream<ImplTraits>::StringType
  1384. TokenIntStream<ImplTraits>::getSourceName()
  1385. {
  1386. // Slightly convoluted as we must trace back to the lexer's input source
  1387. // via the token source. The streamName that is here is not initialized
  1388. // because this is a token stream, not a file or string stream, which are the
  1389. // only things that have a context for a source name.
  1390. //
  1391. return this->get_super()->get_tokenSource()->get_fileName();
  1392. }
  1393. template<class ImplTraits>
  1394. void TreeNodeIntStream<ImplTraits>::consume()
  1395. {
  1396. TreeNodeStreamType* ctns = this->get_super();
  1397. if( ctns->get_p() == -1 )
  1398. ctns->fillBufferRoot();
  1399. ctns->inc_p();
  1400. }
  1401. template<class ImplTraits>
  1402. ANTLR_MARKER TreeNodeIntStream<ImplTraits>::tindex()
  1403. {
  1404. TreeNodeStreamType* ctns = this->get_super();
  1405. return (ANTLR_MARKER)(ctns->get_p());
  1406. }
  1407. template<class ImplTraits>
  1408. ANTLR_UINT32 TreeNodeIntStream<ImplTraits>::LA(ANTLR_INT32 i)
  1409. {
  1410. TreeNodeStreamType* tns = this->get_super();
  1411. // Ask LT for the 'token' at that position
  1412. //
  1413. TreeTypePtr t = tns->LT(i);
  1414. if (t == NULL)
  1415. {
  1416. return CommonTokenType::TOKEN_INVALID;
  1417. }
  1418. // Token node was there so return the type of it
  1419. //
  1420. return t->get_type();
  1421. }
  1422. template<class ImplTraits>
  1423. ANTLR_MARKER TreeNodeIntStream<ImplTraits>::mark()
  1424. {
  1425. TreeNodeStreamType* ctns = this->get_super();
  1426. if (ctns->get_p() == -1)
  1427. {
  1428. ctns->fillBufferRoot();
  1429. }
  1430. // Return the current mark point
  1431. //
  1432. this->set_lastMarker( this->index() );
  1433. return this->get_lastMarker();
  1434. }
  1435. template<class ImplTraits>
  1436. void TreeNodeIntStream<ImplTraits>::release(ANTLR_MARKER /*marker*/)
  1437. {
  1438. }
  1439. template<class ImplTraits>
  1440. void TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker)
  1441. {
  1442. this->seek(marker);
  1443. }
  1444. template<class ImplTraits>
  1445. void TreeNodeIntStream<ImplTraits>::rewindLast()
  1446. {
  1447. this->seek( this->get_lastMarker() );
  1448. }
  1449. template<class ImplTraits>
  1450. void TreeNodeIntStream<ImplTraits>::seek(ANTLR_MARKER index)
  1451. {
  1452. TreeNodeStreamType* ctns = this->get_super();
  1453. ctns->set_p( ANTLR_UINT32_CAST(index) );
  1454. }
  1455. template<class ImplTraits>
  1456. ANTLR_UINT32 TreeNodeIntStream<ImplTraits>::size()
  1457. {
  1458. TreeNodeStreamType* ctns = this->get_super();
  1459. if (ctns->get_p() == -1)
  1460. {
  1461. ctns->fillBufferRoot();
  1462. }
  1463. return ctns->get_nodes().size();
  1464. }
  1465. }