numparse_scientific.cpp 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. // © 2018 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #if !UCONFIG_NO_FORMATTING
  5. // Allow implicit conversion from char16_t* to UnicodeString for this file:
  6. // Helpful in toString methods and elsewhere.
  7. #define UNISTR_FROM_STRING_EXPLICIT
  8. #include "numparse_types.h"
  9. #include "numparse_scientific.h"
  10. #include "static_unicode_sets.h"
  11. #include "string_segment.h"
  12. using namespace icu;
  13. using namespace icu::numparse;
  14. using namespace icu::numparse::impl;
  15. namespace {
  16. inline const UnicodeSet& minusSignSet() {
  17. return *unisets::get(unisets::MINUS_SIGN);
  18. }
  19. inline const UnicodeSet& plusSignSet() {
  20. return *unisets::get(unisets::PLUS_SIGN);
  21. }
  22. } // namespace
  23. ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
  24. : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
  25. fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
  26. fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
  27. const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
  28. if (minusSignSet().contains(minusSign)) {
  29. fCustomMinusSign.setToBogus();
  30. } else {
  31. fCustomMinusSign = minusSign;
  32. }
  33. const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
  34. if (plusSignSet().contains(plusSign)) {
  35. fCustomPlusSign.setToBogus();
  36. } else {
  37. fCustomPlusSign = plusSign;
  38. }
  39. }
  40. bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
  41. // Only accept scientific notation after the mantissa.
  42. if (!result.seenNumber()) {
  43. return false;
  44. }
  45. // Only accept one exponent per string.
  46. if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
  47. return false;
  48. }
  49. // First match the scientific separator, and then match another number after it.
  50. // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
  51. int32_t initialOffset = segment.getOffset();
  52. int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
  53. if (overlap == fExponentSeparatorString.length()) {
  54. // Full exponent separator match.
  55. // First attempt to get a code point, returning true if we can't get one.
  56. if (segment.length() == overlap) {
  57. return true;
  58. }
  59. segment.adjustOffset(overlap);
  60. // Allow ignorables before the sign.
  61. // Note: call site is guarded by the segment.length() check above.
  62. // Note: the ignorables matcher should not touch the result.
  63. fIgnorablesMatcher.match(segment, result, status);
  64. if (segment.length() == 0) {
  65. segment.setOffset(initialOffset);
  66. return true;
  67. }
  68. // Allow a sign, and then try to match digits.
  69. int8_t exponentSign = 1;
  70. if (segment.startsWith(minusSignSet())) {
  71. exponentSign = -1;
  72. segment.adjustOffsetByCodePoint();
  73. } else if (segment.startsWith(plusSignSet())) {
  74. segment.adjustOffsetByCodePoint();
  75. } else if (segment.startsWith(fCustomMinusSign)) {
  76. overlap = segment.getCommonPrefixLength(fCustomMinusSign);
  77. if (overlap != fCustomMinusSign.length()) {
  78. // Partial custom sign match
  79. segment.setOffset(initialOffset);
  80. return true;
  81. }
  82. exponentSign = -1;
  83. segment.adjustOffset(overlap);
  84. } else if (segment.startsWith(fCustomPlusSign)) {
  85. overlap = segment.getCommonPrefixLength(fCustomPlusSign);
  86. if (overlap != fCustomPlusSign.length()) {
  87. // Partial custom sign match
  88. segment.setOffset(initialOffset);
  89. return true;
  90. }
  91. segment.adjustOffset(overlap);
  92. }
  93. // Return true if the segment is empty.
  94. if (segment.length() == 0) {
  95. segment.setOffset(initialOffset);
  96. return true;
  97. }
  98. // Allow ignorables after the sign.
  99. // Note: call site is guarded by the segment.length() check above.
  100. // Note: the ignorables matcher should not touch the result.
  101. fIgnorablesMatcher.match(segment, result, status);
  102. if (segment.length() == 0) {
  103. segment.setOffset(initialOffset);
  104. return true;
  105. }
  106. // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
  107. bool wasBogus = result.quantity.bogus;
  108. result.quantity.bogus = false;
  109. int digitsOffset = segment.getOffset();
  110. bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
  111. result.quantity.bogus = wasBogus;
  112. if (segment.getOffset() != digitsOffset) {
  113. // At least one exponent digit was matched.
  114. result.flags |= FLAG_HAS_EXPONENT;
  115. } else {
  116. // No exponent digits were matched
  117. segment.setOffset(initialOffset);
  118. }
  119. return digitsReturnValue;
  120. } else if (overlap == segment.length()) {
  121. // Partial exponent separator match
  122. return true;
  123. }
  124. // No match
  125. return false;
  126. }
  127. bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
  128. return segment.startsWith(fExponentSeparatorString);
  129. }
  130. UnicodeString ScientificMatcher::toString() const {
  131. return u"<Scientific>";
  132. }
  133. #endif /* #if !UCONFIG_NO_FORMATTING */