unicode_set_lexer.rl6 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. #include <library/cpp/unicode/set/unicode_set_lexer.h>
  2. #include <util/generic/yexception.h>
  3. namespace NUnicode {
  4. namespace NPrivate {
  5. %%{
  6. machine unicode_set_lexer;
  7. alphtype unsigned short;
  8. action IncorrectCategoryError {
  9. throw yexception() << "incorrect category";
  10. }
  11. action IncorrectEscapedCodepointError {
  12. throw yexception() << "incorrect escaped codepoint";
  13. }
  14. action IncorrectQuotedPairError {
  15. throw yexception() << "incorrect quoted pair";
  16. }
  17. id = alpha (alnum | '_')*;
  18. escape = [%\\];
  19. category = (':' id ':') <>^IncorrectCategoryError;
  20. xdigit8 = xdigit{8} @^IncorrectEscapedCodepointError;
  21. xdigit4 = xdigit{4} @^IncorrectEscapedCodepointError;
  22. xdigit2 = xdigit{2} @^IncorrectEscapedCodepointError;
  23. symbol = any @^IncorrectQuotedPairError;
  24. main := |*
  25. '^' => {
  26. return YieldToken(USTT_NEGATION);
  27. };
  28. '-' => {
  29. return YieldToken(USTT_RANGE);
  30. };
  31. '[' => {
  32. return YieldToken(USTT_LBRACKET);
  33. };
  34. ']' => {
  35. return YieldToken(USTT_RBRACKET);
  36. };
  37. category => {
  38. return YieldToken(USTT_CATEGORY, ts + 1, te - ts -2);
  39. };
  40. escape 'U' xdigit8 => {
  41. return YieldToken(USTT_CODEPOINT32, ts + 2, 8);
  42. };
  43. escape 'u' xdigit4 => {
  44. return YieldToken(USTT_CODEPOINT16, ts + 2, 4);
  45. };
  46. escape 'x' xdigit2 => {
  47. return YieldToken(USTT_CODEPOINT8, ts + 2, 2);
  48. };
  49. escape symbol => {
  50. return YieldToken(USTT_QUOTED_PAIR, *(ts + 1));
  51. };
  52. any => {
  53. return YieldToken(USTT_SYMBOL, *ts);
  54. };
  55. *|;
  56. }%%
  57. namespace {
  58. %% write data;
  59. }
  60. TUnicodeSetLexer::TUnicodeSetLexer(const TWtringBuf& data)
  61. : Data(data)
  62. , cs(0)
  63. , act(0)
  64. , ts(NULL)
  65. , te(NULL)
  66. , p(Data.data())
  67. , pe(Data.data() + Data.size())
  68. , eof(pe)
  69. , UseLast(false)
  70. {
  71. %% write init;
  72. }
  73. EUnicodeSetTokenType TUnicodeSetLexer::GetToken() {
  74. if (UseLast) {
  75. UseLast = false;
  76. return LastToken.Type;
  77. }
  78. %% write exec;
  79. return YieldToken(USTT_EOS);
  80. }
  81. EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type) {
  82. Reset();
  83. LastToken = TUnicodeSetToken(type);
  84. return type;
  85. }
  86. EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type, wchar16 symbol) {
  87. Reset();
  88. LastToken = TUnicodeSetToken(type, symbol);
  89. return type;
  90. }
  91. EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type, const wchar16* dataBegin, size_t dataSize) {
  92. Reset();
  93. LastToken = TUnicodeSetToken(type, dataBegin, dataSize);
  94. return type;
  95. }
  96. void TUnicodeSetLexer::Reset() {
  97. p = te;
  98. ts = NULL;
  99. te = NULL;
  100. }
  101. } // NPrivate
  102. } // NUnicode