ruleiter.cpp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (c) 2003-2011, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. * Author: Alan Liu
  9. * Created: September 24 2003
  10. * Since: ICU 2.8
  11. **********************************************************************
  12. */
  13. #include "ruleiter.h"
  14. #include "unicode/parsepos.h"
  15. #include "unicode/symtable.h"
  16. #include "unicode/unistr.h"
  17. #include "unicode/utf16.h"
  18. #include "patternprops.h"
  19. /* \U87654321 or \ud800\udc00 */
  20. #define MAX_U_NOTATION_LEN 12
  21. U_NAMESPACE_BEGIN
  22. RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
  23. ParsePosition& thePos) :
  24. text(theText),
  25. pos(thePos),
  26. sym(theSym),
  27. buf(nullptr),
  28. bufPos(0)
  29. {}
  30. UBool RuleCharacterIterator::atEnd() const {
  31. return buf == nullptr && pos.getIndex() == text.length();
  32. }
  33. UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
  34. if (U_FAILURE(ec)) return DONE;
  35. UChar32 c = DONE;
  36. isEscaped = false;
  37. for (;;) {
  38. c = _current();
  39. _advance(U16_LENGTH(c));
  40. if (c == SymbolTable::SYMBOL_REF && buf == nullptr &&
  41. (options & PARSE_VARIABLES) != 0 && sym != nullptr) {
  42. UnicodeString name = sym->parseReference(text, pos, text.length());
  43. // If name is empty there was an isolated SYMBOL_REF;
  44. // return it. Caller must be prepared for this.
  45. if (name.length() == 0) {
  46. break;
  47. }
  48. bufPos = 0;
  49. buf = sym->lookup(name);
  50. if (buf == nullptr) {
  51. ec = U_UNDEFINED_VARIABLE;
  52. return DONE;
  53. }
  54. // Handle empty variable value
  55. if (buf->length() == 0) {
  56. buf = nullptr;
  57. }
  58. continue;
  59. }
  60. if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
  61. continue;
  62. }
  63. if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
  64. UnicodeString tempEscape;
  65. int32_t offset = 0;
  66. c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
  67. jumpahead(offset);
  68. isEscaped = true;
  69. if (c < 0) {
  70. ec = U_MALFORMED_UNICODE_ESCAPE;
  71. return DONE;
  72. }
  73. }
  74. break;
  75. }
  76. return c;
  77. }
  78. void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
  79. p.buf = buf;
  80. p.pos = pos.getIndex();
  81. p.bufPos = bufPos;
  82. }
  83. void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
  84. buf = p.buf;
  85. pos.setIndex(p.pos);
  86. bufPos = p.bufPos;
  87. }
  88. void RuleCharacterIterator::skipIgnored(int32_t options) {
  89. if ((options & SKIP_WHITESPACE) != 0) {
  90. for (;;) {
  91. UChar32 a = _current();
  92. if (!PatternProps::isWhiteSpace(a)) break;
  93. _advance(U16_LENGTH(a));
  94. }
  95. }
  96. }
  97. UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
  98. if (maxLookAhead < 0) {
  99. maxLookAhead = 0x7FFFFFFF;
  100. }
  101. if (buf != nullptr) {
  102. buf->extract(bufPos, maxLookAhead, result);
  103. } else {
  104. text.extract(pos.getIndex(), maxLookAhead, result);
  105. }
  106. return result;
  107. }
  108. void RuleCharacterIterator::jumpahead(int32_t count) {
  109. _advance(count);
  110. }
  111. /*
  112. UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
  113. int32_t b = pos.getIndex();
  114. text.extract(0, b, result);
  115. return result.append((char16_t) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
  116. }
  117. */
  118. UChar32 RuleCharacterIterator::_current() const {
  119. if (buf != nullptr) {
  120. return buf->char32At(bufPos);
  121. } else {
  122. int i = pos.getIndex();
  123. return (i < text.length()) ? text.char32At(i) : static_cast<UChar32>(DONE);
  124. }
  125. }
  126. void RuleCharacterIterator::_advance(int32_t count) {
  127. if (buf != nullptr) {
  128. bufPos += count;
  129. if (bufPos == buf->length()) {
  130. buf = nullptr;
  131. }
  132. } else {
  133. pos.setIndex(pos.getIndex() + count);
  134. if (pos.getIndex() > text.length()) {
  135. pos.setIndex(text.length());
  136. }
  137. }
  138. }
  139. U_NAMESPACE_END
  140. //eof