123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- **********************************************************************
- * Copyright (c) 2003-2011, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
- * Author: Alan Liu
- * Created: September 24 2003
- * Since: ICU 2.8
- **********************************************************************
- */
- #include "ruleiter.h"
- #include "unicode/parsepos.h"
- #include "unicode/symtable.h"
- #include "unicode/unistr.h"
- #include "unicode/utf16.h"
- #include "patternprops.h"
- /* \U87654321 or \ud800\udc00 */
- #define MAX_U_NOTATION_LEN 12
- U_NAMESPACE_BEGIN
- RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
- ParsePosition& thePos) :
- text(theText),
- pos(thePos),
- sym(theSym),
- buf(nullptr),
- bufPos(0)
- {}
- UBool RuleCharacterIterator::atEnd() const {
- return buf == nullptr && pos.getIndex() == text.length();
- }
- UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
- if (U_FAILURE(ec)) return DONE;
- UChar32 c = DONE;
- isEscaped = false;
- for (;;) {
- c = _current();
- _advance(U16_LENGTH(c));
- if (c == SymbolTable::SYMBOL_REF && buf == nullptr &&
- (options & PARSE_VARIABLES) != 0 && sym != nullptr) {
- UnicodeString name = sym->parseReference(text, pos, text.length());
- // If name is empty there was an isolated SYMBOL_REF;
- // return it. Caller must be prepared for this.
- if (name.length() == 0) {
- break;
- }
- bufPos = 0;
- buf = sym->lookup(name);
- if (buf == nullptr) {
- ec = U_UNDEFINED_VARIABLE;
- return DONE;
- }
- // Handle empty variable value
- if (buf->length() == 0) {
- buf = nullptr;
- }
- continue;
- }
- if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
- continue;
- }
- if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
- UnicodeString tempEscape;
- int32_t offset = 0;
- c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
- jumpahead(offset);
- isEscaped = true;
- if (c < 0) {
- ec = U_MALFORMED_UNICODE_ESCAPE;
- return DONE;
- }
- }
- break;
- }
- return c;
- }
- void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
- p.buf = buf;
- p.pos = pos.getIndex();
- p.bufPos = bufPos;
- }
- void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
- buf = p.buf;
- pos.setIndex(p.pos);
- bufPos = p.bufPos;
- }
- void RuleCharacterIterator::skipIgnored(int32_t options) {
- if ((options & SKIP_WHITESPACE) != 0) {
- for (;;) {
- UChar32 a = _current();
- if (!PatternProps::isWhiteSpace(a)) break;
- _advance(U16_LENGTH(a));
- }
- }
- }
- UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
- if (maxLookAhead < 0) {
- maxLookAhead = 0x7FFFFFFF;
- }
- if (buf != nullptr) {
- buf->extract(bufPos, maxLookAhead, result);
- } else {
- text.extract(pos.getIndex(), maxLookAhead, result);
- }
- return result;
- }
- void RuleCharacterIterator::jumpahead(int32_t count) {
- _advance(count);
- }
- /*
- UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
- int32_t b = pos.getIndex();
- text.extract(0, b, result);
- return result.append((char16_t) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
- }
- */
- UChar32 RuleCharacterIterator::_current() const {
- if (buf != nullptr) {
- return buf->char32At(bufPos);
- } else {
- int i = pos.getIndex();
- return (i < text.length()) ? text.char32At(i) : static_cast<UChar32>(DONE);
- }
- }
- void RuleCharacterIterator::_advance(int32_t count) {
- if (buf != nullptr) {
- bufPos += count;
- if (bufPos == buf->length()) {
- buf = nullptr;
- }
- } else {
- pos.setIndex(pos.getIndex() + count);
- if (pos.getIndex() > text.length()) {
- pos.setIndex(text.length());
- }
- }
- }
- U_NAMESPACE_END
- //eof
|