numparse_affixes.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. // © 2018 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #if !UCONFIG_NO_FORMATTING
  5. #ifndef __NUMPARSE_AFFIXES_H__
  6. #define __NUMPARSE_AFFIXES_H__
  7. #include "cmemory.h"
  8. #include "numparse_types.h"
  9. #include "numparse_symbols.h"
  10. #include "numparse_currency.h"
  11. #include "number_affixutils.h"
  12. #include "number_currencysymbols.h"
  13. U_NAMESPACE_BEGIN
  14. namespace numparse {
  15. namespace impl {
  16. // Forward-declaration of implementation classes for friending
  17. class AffixPatternMatcherBuilder;
  18. class AffixPatternMatcher;
  19. using ::icu::number::impl::AffixPatternProvider;
  20. using ::icu::number::impl::TokenConsumer;
  21. using ::icu::number::impl::CurrencySymbols;
  22. class U_I18N_API CodePointMatcher : public NumberParseMatcher, public UMemory {
  23. public:
  24. CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state
  25. CodePointMatcher(UChar32 cp);
  26. bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
  27. bool smokeTest(const StringSegment& segment) const override;
  28. UnicodeString toString() const override;
  29. private:
  30. UChar32 fCp;
  31. };
  32. } // namespace impl
  33. } // namespace numparse
  34. // Export a explicit template instantiations of MaybeStackArray, MemoryPool and CompactUnicodeString.
  35. // When building DLLs for Windows this is required even though no direct access leaks out of the i18n library.
  36. // (See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.)
  37. // Note: These need to be outside of the numparse::impl namespace, or Clang will generate a compile error.
  38. #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
  39. template class U_I18N_API MaybeStackArray<numparse::impl::CodePointMatcher*, 8>;
  40. template class U_I18N_API MaybeStackArray<char16_t, 4>;
  41. template class U_I18N_API MemoryPool<numparse::impl::CodePointMatcher, 8>;
  42. template class U_I18N_API numparse::impl::CompactUnicodeString<4>;
  43. #endif
  44. namespace numparse {
  45. namespace impl {
  46. struct AffixTokenMatcherSetupData {
  47. const CurrencySymbols& currencySymbols;
  48. const DecimalFormatSymbols& dfs;
  49. IgnorablesMatcher& ignorables;
  50. const Locale& locale;
  51. parse_flags_t parseFlags;
  52. };
  53. /**
  54. * Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
  55. *
  56. * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a
  57. * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The
  58. * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from
  59. * the warehouse.
  60. *
  61. * @author sffc
  62. */
  63. // Exported as U_I18N_API for tests
  64. class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
  65. public:
  66. AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
  67. AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData);
  68. NumberParseMatcher& minusSign();
  69. NumberParseMatcher& plusSign();
  70. NumberParseMatcher& percent();
  71. NumberParseMatcher& permille();
  72. NumberParseMatcher& currency(UErrorCode& status);
  73. IgnorablesMatcher& ignorables();
  74. NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status);
  75. bool hasEmptyCurrencySymbol() const;
  76. private:
  77. // NOTE: The following field may be unsafe to access after construction is done!
  78. const AffixTokenMatcherSetupData* fSetupData;
  79. // NOTE: These are default-constructed and should not be used until initialized.
  80. MinusSignMatcher fMinusSign;
  81. PlusSignMatcher fPlusSign;
  82. PercentMatcher fPercent;
  83. PermilleMatcher fPermille;
  84. CombinedCurrencyMatcher fCurrency;
  85. // Use a child class for code point matchers, since it requires non-default operators.
  86. MemoryPool<CodePointMatcher> fCodePoints;
  87. friend class AffixPatternMatcherBuilder;
  88. friend class AffixPatternMatcher;
  89. };
  90. class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection {
  91. public:
  92. AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
  93. IgnorablesMatcher* ignorables);
  94. void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override;
  95. /** NOTE: You can build only once! */
  96. AffixPatternMatcher build(UErrorCode& status);
  97. private:
  98. ArraySeriesMatcher::MatcherArray fMatchers;
  99. int32_t fMatchersLen;
  100. int32_t fLastTypeOrCp;
  101. const UnicodeString& fPattern;
  102. AffixTokenMatcherWarehouse& fWarehouse;
  103. IgnorablesMatcher* fIgnorables;
  104. void addMatcher(NumberParseMatcher& matcher) override;
  105. };
  106. // Exported as U_I18N_API for tests
  107. class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher {
  108. public:
  109. AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
  110. static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
  111. AffixTokenMatcherWarehouse& warehouse,
  112. parse_flags_t parseFlags, bool* success,
  113. UErrorCode& status);
  114. UnicodeString getPattern() const;
  115. bool operator==(const AffixPatternMatcher& other) const;
  116. private:
  117. CompactUnicodeString<4> fPattern;
  118. AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern,
  119. UErrorCode& status);
  120. friend class AffixPatternMatcherBuilder;
  121. };
  122. class AffixMatcher : public NumberParseMatcher, public UMemory {
  123. public:
  124. AffixMatcher() = default; // WARNING: Leaves the object in an unusable state
  125. AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags);
  126. bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
  127. void postProcess(ParsedNumber& result) const override;
  128. bool smokeTest(const StringSegment& segment) const override;
  129. int8_t compareTo(const AffixMatcher& rhs) const;
  130. UnicodeString toString() const override;
  131. private:
  132. AffixPatternMatcher* fPrefix;
  133. AffixPatternMatcher* fSuffix;
  134. result_flags_t fFlags;
  135. };
  136. /**
  137. * A C++-only class to retain ownership of the AffixMatchers needed for parsing.
  138. */
  139. class AffixMatcherWarehouse {
  140. public:
  141. AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
  142. AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse);
  143. void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output,
  144. const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
  145. UErrorCode& status);
  146. private:
  147. // 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix,
  148. // and doubled since there may be an empty currency symbol
  149. AffixMatcher fAffixMatchers[18];
  150. // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each,
  151. // and doubled since there may be an empty currency symbol
  152. AffixPatternMatcher fAffixPatternMatchers[12];
  153. // Reference to the warehouse for tokens used by the AffixPatternMatchers
  154. AffixTokenMatcherWarehouse* fTokenWarehouse;
  155. friend class AffixMatcher;
  156. static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables,
  157. parse_flags_t parseFlags, UErrorCode& status);
  158. };
  159. } // namespace impl
  160. } // namespace numparse
  161. U_NAMESPACE_END
  162. #endif //__NUMPARSE_AFFIXES_H__
  163. #endif /* #if !UCONFIG_NO_FORMATTING */