123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- // © 2018 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_FORMATTING
- // Allow implicit conversion from char16_t* to UnicodeString for this file:
- // Helpful in toString methods and elsewhere.
- #define UNISTR_FROM_STRING_EXPLICIT
- #include "numparse_types.h"
- #include "numparse_scientific.h"
- #include "static_unicode_sets.h"
- #include "string_segment.h"
- using namespace icu;
- using namespace icu::numparse;
- using namespace icu::numparse::impl;
- namespace {
- inline const UnicodeSet& minusSignSet() {
- return *unisets::get(unisets::MINUS_SIGN);
- }
- inline const UnicodeSet& plusSignSet() {
- return *unisets::get(unisets::PLUS_SIGN);
- }
- } // namespace
- ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
- : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
- fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
- fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
- const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
- if (minusSignSet().contains(minusSign)) {
- fCustomMinusSign.setToBogus();
- } else {
- fCustomMinusSign = minusSign;
- }
- const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
- if (plusSignSet().contains(plusSign)) {
- fCustomPlusSign.setToBogus();
- } else {
- fCustomPlusSign = plusSign;
- }
- }
- bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
- // Only accept scientific notation after the mantissa.
- if (!result.seenNumber()) {
- return false;
- }
- // Only accept one exponent per string.
- if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
- return false;
- }
- // First match the scientific separator, and then match another number after it.
- // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
- int32_t initialOffset = segment.getOffset();
- int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
- if (overlap == fExponentSeparatorString.length()) {
- // Full exponent separator match.
- // First attempt to get a code point, returning true if we can't get one.
- if (segment.length() == overlap) {
- return true;
- }
- segment.adjustOffset(overlap);
- // Allow ignorables before the sign.
- // Note: call site is guarded by the segment.length() check above.
- // Note: the ignorables matcher should not touch the result.
- fIgnorablesMatcher.match(segment, result, status);
- if (segment.length() == 0) {
- segment.setOffset(initialOffset);
- return true;
- }
- // Allow a sign, and then try to match digits.
- int8_t exponentSign = 1;
- if (segment.startsWith(minusSignSet())) {
- exponentSign = -1;
- segment.adjustOffsetByCodePoint();
- } else if (segment.startsWith(plusSignSet())) {
- segment.adjustOffsetByCodePoint();
- } else if (segment.startsWith(fCustomMinusSign)) {
- overlap = segment.getCommonPrefixLength(fCustomMinusSign);
- if (overlap != fCustomMinusSign.length()) {
- // Partial custom sign match
- segment.setOffset(initialOffset);
- return true;
- }
- exponentSign = -1;
- segment.adjustOffset(overlap);
- } else if (segment.startsWith(fCustomPlusSign)) {
- overlap = segment.getCommonPrefixLength(fCustomPlusSign);
- if (overlap != fCustomPlusSign.length()) {
- // Partial custom sign match
- segment.setOffset(initialOffset);
- return true;
- }
- segment.adjustOffset(overlap);
- }
- // Return true if the segment is empty.
- if (segment.length() == 0) {
- segment.setOffset(initialOffset);
- return true;
- }
- // Allow ignorables after the sign.
- // Note: call site is guarded by the segment.length() check above.
- // Note: the ignorables matcher should not touch the result.
- fIgnorablesMatcher.match(segment, result, status);
- if (segment.length() == 0) {
- segment.setOffset(initialOffset);
- return true;
- }
- // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
- bool wasBogus = result.quantity.bogus;
- result.quantity.bogus = false;
- int digitsOffset = segment.getOffset();
- bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
- result.quantity.bogus = wasBogus;
- if (segment.getOffset() != digitsOffset) {
- // At least one exponent digit was matched.
- result.flags |= FLAG_HAS_EXPONENT;
- } else {
- // No exponent digits were matched
- segment.setOffset(initialOffset);
- }
- return digitsReturnValue;
- } else if (overlap == segment.length()) {
- // Partial exponent separator match
- return true;
- }
- // No match
- return false;
- }
- bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
- return segment.startsWith(fExponentSeparatorString);
- }
- UnicodeString ScientificMatcher::toString() const {
- return u"<Scientific>";
- }
- #endif /* #if !UCONFIG_NO_FORMATTING */
|