antlr3input.inl 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. namespace antlr3 {
  2. template<class ImplTraits>
  3. InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding)
  4. {
  5. // First order of business is to read the file into some buffer space
  6. // as just straight 8 bit bytes. Then we will work out the encoding and
  7. // byte order and adjust the API functions that are installed for the
  8. // default 8Bit stream accordingly.
  9. //
  10. this->createFileStream(fileName);
  11. // We have the data in memory now so we can deal with it according to
  12. // the encoding scheme we were given by the user.
  13. //
  14. m_encoding = encoding;
  15. // Now we need to work out the endian type and install any
  16. // API functions that differ from 8Bit
  17. //
  18. this->setupInputStream();
  19. // Now we can set up the file name
  20. //
  21. BaseType::m_streamName = (const char* )fileName;
  22. m_fileName = BaseType::m_streamName;
  23. }
  24. template<class ImplTraits>
  25. InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name)
  26. {
  27. // First order of business is to set up the stream and install the data pointer.
  28. // Then we will work out the encoding and byte order and adjust the API functions that are installed for the
  29. // default 8Bit stream accordingly.
  30. //
  31. this->createStringStream(data);
  32. // Size (in bytes) of the given 'string'
  33. //
  34. m_sizeBuf = size;
  35. // We have the data in memory now so we can deal with it according to
  36. // the encoding scheme we were given by the user.
  37. //
  38. m_encoding = encoding;
  39. // Now we need to work out the endian type and install any
  40. // API functions that differ from 8Bit
  41. //
  42. this->setupInputStream();
  43. // Now we can set up the file name
  44. //
  45. BaseType::m_streamName = (name == NULL ) ? "" : (const char*)name;
  46. m_fileName = BaseType::m_streamName;
  47. }
  48. template<class ImplTraits>
  49. void InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data)
  50. {
  51. if (data == NULL)
  52. {
  53. ParseNullStringException ex;
  54. throw ex;
  55. }
  56. // Structure was allocated correctly, now we can install the pointer
  57. //
  58. m_data = data;
  59. m_isAllocated = false;
  60. // Call the common 8 bit input stream handler
  61. // initialization.
  62. //
  63. this->genericSetupStream();
  64. }
  65. template<class ImplTraits>
  66. void InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName)
  67. {
  68. if (fileName == NULL)
  69. {
  70. ParseFileAbsentException ex;
  71. throw ex;
  72. }
  73. // Structure was allocated correctly, now we can read the file.
  74. //
  75. FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName);
  76. // Call the common 8 bit input stream handler
  77. // initialization.
  78. //
  79. this->genericSetupStream();
  80. }
  81. template<class ImplTraits>
  82. void InputStream<ImplTraits>::genericSetupStream()
  83. {
  84. this->set_charByteSize(1);
  85. /* Set up the input stream brand new
  86. */
  87. this->reset();
  88. /* Install default line separator character (it can be replaced
  89. * by the grammar programmer later)
  90. */
  91. this->set_newLineChar((ANTLR_UCHAR)'\n');
  92. }
  93. template<class ImplTraits>
  94. InputStream<ImplTraits>::~InputStream()
  95. {
  96. // Free the input stream buffer if we allocated it
  97. //
  98. if (m_isAllocated && (m_data != NULL))
  99. AllocPolicyType::free((void*)m_data); //const_cast is required
  100. }
  101. template<class ImplTraits>
  102. ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const
  103. {
  104. return m_data;
  105. }
  106. template<class ImplTraits>
  107. ANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const
  108. {
  109. return m_isAllocated;
  110. }
  111. template<class ImplTraits>
  112. ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const
  113. {
  114. return m_nextChar;
  115. }
  116. template<class ImplTraits>
  117. ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const
  118. {
  119. return m_sizeBuf;
  120. }
  121. template<class ImplTraits>
  122. ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const
  123. {
  124. return m_line;
  125. }
  126. template<class ImplTraits>
  127. ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const
  128. {
  129. return m_currentLine;
  130. }
  131. template<class ImplTraits>
  132. ANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const
  133. {
  134. return m_charPositionInLine;
  135. }
  136. template<class ImplTraits>
  137. ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const
  138. {
  139. return m_markDepth;
  140. }
  141. template<class ImplTraits>
  142. ANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers()
  143. {
  144. return m_markers;
  145. }
  146. template<class ImplTraits>
  147. ANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const
  148. {
  149. return m_fileName;
  150. }
  151. template<class ImplTraits>
  152. ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const
  153. {
  154. return m_fileNo;
  155. }
  156. template<class ImplTraits>
  157. ANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const
  158. {
  159. return m_newlineChar;
  160. }
  161. template<class ImplTraits>
  162. ANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const
  163. {
  164. return m_charByteSize;
  165. }
  166. template<class ImplTraits>
  167. ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const
  168. {
  169. return m_encoding;
  170. }
  171. template<class ImplTraits>
  172. ANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data )
  173. {
  174. m_data = data;
  175. }
  176. template<class ImplTraits>
  177. ANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated )
  178. {
  179. m_isAllocated = isAllocated;
  180. }
  181. template<class ImplTraits>
  182. ANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar )
  183. {
  184. m_nextChar = nextChar;
  185. }
  186. template<class ImplTraits>
  187. ANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf )
  188. {
  189. m_sizeBuf = sizeBuf;
  190. }
  191. template<class ImplTraits>
  192. ANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line )
  193. {
  194. m_line = line;
  195. }
  196. template<class ImplTraits>
  197. ANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine )
  198. {
  199. m_currentLine = currentLine;
  200. }
  201. template<class ImplTraits>
  202. ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
  203. {
  204. m_charPositionInLine = charPositionInLine;
  205. }
  206. template<class ImplTraits>
  207. ANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth )
  208. {
  209. m_markDepth = markDepth;
  210. }
  211. template<class ImplTraits>
  212. ANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers )
  213. {
  214. m_markers = markers;
  215. }
  216. template<class ImplTraits>
  217. ANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName )
  218. {
  219. m_fileName = fileName;
  220. }
  221. template<class ImplTraits>
  222. ANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo )
  223. {
  224. m_fileNo = fileNo;
  225. }
  226. template<class ImplTraits>
  227. ANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar )
  228. {
  229. m_newlineChar = newlineChar;
  230. }
  231. template<class ImplTraits>
  232. ANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize )
  233. {
  234. m_charByteSize = charByteSize;
  235. }
  236. template<class ImplTraits>
  237. ANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding )
  238. {
  239. m_encoding = encoding;
  240. }
  241. template<class ImplTraits>
  242. ANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine()
  243. {
  244. ++m_charPositionInLine;
  245. }
  246. template<class ImplTraits>
  247. ANTLR_INLINE void InputStream<ImplTraits>::inc_line()
  248. {
  249. ++m_line;
  250. }
  251. template<class ImplTraits>
  252. ANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth()
  253. {
  254. ++m_markDepth;
  255. }
  256. template<class ImplTraits>
  257. ANTLR_INLINE void InputStream<ImplTraits>::reset()
  258. {
  259. m_nextChar = m_data; /* Input at first character */
  260. m_line = 1; /* starts at line 1 */
  261. m_charPositionInLine = 0;
  262. m_currentLine = m_data;
  263. m_markDepth = 0; /* Reset markers */
  264. /* Clear out up the markers table if it is there
  265. */
  266. m_markers.clear();
  267. }
  268. template<class ImplTraits>
  269. void InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name)
  270. {
  271. m_isAllocated = false;
  272. m_data = inString;
  273. m_sizeBuf = size;
  274. // Now we can set up the file name. As we are reusing the stream, there may already
  275. // be a string that we can reuse for holding the filename.
  276. //
  277. if ( BaseType::m_streamName.empty() )
  278. {
  279. BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name);
  280. m_fileName = BaseType::m_streamName;
  281. }
  282. else
  283. {
  284. BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name);
  285. }
  286. this->reset();
  287. }
  288. /*
  289. template<class ImplTraits>
  290. typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::LT(ANTLR_INT32 lt)
  291. {
  292. return this->LA(lt);
  293. }
  294. */
  295. template<class ImplTraits>
  296. ANTLR_UINT32 InputStream<ImplTraits>::size()
  297. {
  298. return m_sizeBuf;
  299. }
  300. template<class ImplTraits>
  301. ANTLR_MARKER InputStream<ImplTraits>::index_impl()
  302. {
  303. return (ANTLR_MARKER)m_nextChar;
  304. }
  305. template<class ImplTraits>
  306. typename InputStream<ImplTraits>::StringType InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop)
  307. {
  308. std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 );
  309. StringType str( (const char*)start, len );
  310. return str;
  311. }
  312. template<class ImplTraits>
  313. ANTLR_UINT32 InputStream<ImplTraits>::get_line()
  314. {
  315. return m_line;
  316. }
  317. template<class ImplTraits>
  318. const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::getLineBuf()
  319. {
  320. return m_currentLine;
  321. }
  322. template<class ImplTraits>
  323. ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_charPositionInLine()
  324. {
  325. return m_charPositionInLine;
  326. }
  327. template<class ImplTraits>
  328. ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position)
  329. {
  330. m_charPositionInLine = position;
  331. }
  332. template<class ImplTraits>
  333. void InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar)
  334. {
  335. m_newlineChar = newlineChar;
  336. }
  337. template<class ImplTraits>
  338. ANTLR_INLINE LexState<ImplTraits>::LexState()
  339. {
  340. m_nextChar = NULL;
  341. m_line = 0;
  342. m_currentLine = NULL;
  343. m_charPositionInLine = 0;
  344. }
  345. template<class ImplTraits>
  346. ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const
  347. {
  348. return m_nextChar;
  349. }
  350. template<class ImplTraits>
  351. ANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const
  352. {
  353. return m_line;
  354. }
  355. template<class ImplTraits>
  356. ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const
  357. {
  358. return m_currentLine;
  359. }
  360. template<class ImplTraits>
  361. ANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const
  362. {
  363. return m_charPositionInLine;
  364. }
  365. template<class ImplTraits>
  366. ANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar )
  367. {
  368. m_nextChar = nextChar;
  369. }
  370. template<class ImplTraits>
  371. ANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line )
  372. {
  373. m_line = line;
  374. }
  375. template<class ImplTraits>
  376. ANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine )
  377. {
  378. m_currentLine = currentLine;
  379. }
  380. template<class ImplTraits>
  381. ANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
  382. {
  383. m_charPositionInLine = charPositionInLine;
  384. }
  385. template<class ImplTraits>
  386. ANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType* InputStream<ImplTraits>::get_istream()
  387. {
  388. return this;
  389. }
  390. template<class ImplTraits>
  391. void InputStream<ImplTraits>::setupInputStream()
  392. {
  393. bool isBigEndian;
  394. // Used to determine the endianness of the machine we are currently
  395. // running on.
  396. //
  397. ANTLR_UINT16 bomTest = 0xFEFF;
  398. // What endianess is the machine we are running on? If the incoming
  399. // encoding endianess is the same as this machine's natural byte order
  400. // then we can use more efficient API calls.
  401. //
  402. if (*((ANTLR_UINT8*)(&bomTest)) == 0xFE)
  403. {
  404. isBigEndian = true;
  405. }
  406. else
  407. {
  408. isBigEndian = false;
  409. }
  410. // What encoding did the user tell us {s}he thought it was? I am going
  411. // to get sick of the questions on antlr-interest, I know I am.
  412. //
  413. switch (m_encoding)
  414. {
  415. case ENC_UTF8:
  416. // See if there is a BOM at the start of this UTF-8 sequence
  417. // and just eat it if there is. Windows .TXT files have this for instance
  418. // as it identifies UTF-8 even though it is of no consequence for byte order
  419. // as UTF-8 does not have a byte order.
  420. //
  421. if ( (*(m_nextChar)) == 0xEF
  422. && (*(m_nextChar+1)) == 0xBB
  423. && (*(m_nextChar+2)) == 0xBF
  424. )
  425. {
  426. // The UTF8 BOM is present so skip it
  427. //
  428. m_nextChar += 3;
  429. }
  430. // Install the UTF8 input routines
  431. //
  432. this->setupIntStream( isBigEndian, isBigEndian );
  433. this->set_charByteSize(0);
  434. break;
  435. case ENC_UTF16:
  436. // See if there is a BOM at the start of the input. If not then
  437. // we assume that the byte order is the natural order of this
  438. // machine (or it is really UCS2). If there is a BOM we determine if the encoding
  439. // is the same as the natural order of this machine.
  440. //
  441. if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFE
  442. && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFF
  443. )
  444. {
  445. // BOM Present, indicates Big Endian
  446. //
  447. m_nextChar += 1;
  448. this->setupIntStream( isBigEndian, true );
  449. }
  450. else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF
  451. && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE
  452. )
  453. {
  454. // BOM present, indicates Little Endian
  455. //
  456. m_nextChar += 1;
  457. this->setupIntStream( isBigEndian, false );
  458. }
  459. else
  460. {
  461. // No BOM present, assume local computer byte order
  462. //
  463. this->setupIntStream(isBigEndian, isBigEndian);
  464. }
  465. this->set_charByteSize(2);
  466. break;
  467. case ENC_UTF32:
  468. // See if there is a BOM at the start of the input. If not then
  469. // we assume that the byte order is the natural order of this
  470. // machine. If there is we determine if the encoding
  471. // is the same as the natural order of this machine.
  472. //
  473. if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0x00
  474. && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00
  475. && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2)) == 0xFE
  476. && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3)) == 0xFF
  477. )
  478. {
  479. // BOM Present, indicates Big Endian
  480. //
  481. m_nextChar += 1;
  482. this->setupIntStream(isBigEndian, true);
  483. }
  484. else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF
  485. && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE
  486. && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00
  487. && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00
  488. )
  489. {
  490. // BOM present, indicates Little Endian
  491. //
  492. m_nextChar += 1;
  493. this->setupIntStream( isBigEndian, false );
  494. }
  495. else
  496. {
  497. // No BOM present, assume local computer byte order
  498. //
  499. this->setupIntStream( isBigEndian, isBigEndian );
  500. }
  501. this->set_charByteSize(4);
  502. break;
  503. case ENC_UTF16BE:
  504. // Encoding is definately Big Endian with no BOM
  505. //
  506. this->setupIntStream( isBigEndian, true );
  507. this->set_charByteSize(2);
  508. break;
  509. case ENC_UTF16LE:
  510. // Encoding is definately Little Endian with no BOM
  511. //
  512. this->setupIntStream( isBigEndian, false );
  513. this->set_charByteSize(2);
  514. break;
  515. case ENC_UTF32BE:
  516. // Encoding is definately Big Endian with no BOM
  517. //
  518. this->setupIntStream( isBigEndian, true );
  519. this->set_charByteSize(4);
  520. break;
  521. case ENC_UTF32LE:
  522. // Encoding is definately Little Endian with no BOM
  523. //
  524. this->setupIntStream( isBigEndian, false );
  525. this->set_charByteSize(4);
  526. break;
  527. case ENC_EBCDIC:
  528. // EBCDIC is basically the same as ASCII but with an on the
  529. // fly translation to ASCII
  530. //
  531. this->setupIntStream( isBigEndian, isBigEndian );
  532. this->set_charByteSize(1);
  533. break;
  534. case ENC_8BIT:
  535. default:
  536. // Standard 8bit/ASCII
  537. //
  538. this->setupIntStream( isBigEndian, isBigEndian );
  539. this->set_charByteSize(1);
  540. break;
  541. }
  542. }
  543. }