static_unicode_sets.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. // © 2018 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // This file contains utilities to deal with static-allocated UnicodeSets.
  4. //
  5. // Common use case: you write a "private static final" UnicodeSet in Java, and
  6. // want something similarly easy in C++. Originally written for number
  7. // parsing, but this header can be used for other applications.
  8. //
  9. // Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)`
  10. //
  11. // This file is in common instead of i18n because it is needed by ucurr.cpp.
  12. //
  13. // Author: sffc
  14. #include "unicode/utypes.h"
  15. #if !UCONFIG_NO_FORMATTING
  16. #ifndef __STATIC_UNICODE_SETS_H__
  17. #define __STATIC_UNICODE_SETS_H__
  18. #include "unicode/uniset.h"
  19. #include "unicode/unistr.h"
  20. U_NAMESPACE_BEGIN
  21. namespace unisets {
  22. enum Key {
  23. // NONE is used to indicate null in chooseFrom().
  24. // EMPTY is used to get an empty UnicodeSet.
  25. NONE = -1,
  26. EMPTY = 0,
  27. // Ignorables
  28. DEFAULT_IGNORABLES,
  29. STRICT_IGNORABLES,
  30. // Separators
  31. // Notes:
  32. // - COMMA is a superset of STRICT_COMMA
  33. // - PERIOD is a superset of SCRICT_PERIOD
  34. // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
  35. // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
  36. COMMA,
  37. PERIOD,
  38. STRICT_COMMA,
  39. STRICT_PERIOD,
  40. APOSTROPHE_SIGN,
  41. OTHER_GROUPING_SEPARATORS,
  42. ALL_SEPARATORS,
  43. STRICT_ALL_SEPARATORS,
  44. // Symbols
  45. MINUS_SIGN,
  46. PLUS_SIGN,
  47. PERCENT_SIGN,
  48. PERMILLE_SIGN,
  49. INFINITY_SIGN,
  50. // Currency Symbols
  51. DOLLAR_SIGN,
  52. POUND_SIGN,
  53. RUPEE_SIGN,
  54. YEN_SIGN,
  55. WON_SIGN,
  56. // Other
  57. DIGITS,
  58. // Combined Separators with Digits (for lead code points)
  59. DIGITS_OR_ALL_SEPARATORS,
  60. DIGITS_OR_STRICT_ALL_SEPARATORS,
  61. // The number of elements in the enum.
  62. UNISETS_KEY_COUNT
  63. };
  64. /**
  65. * Gets the static-allocated UnicodeSet according to the provided key. The
  66. * pointer will be deleted during u_cleanup(); the caller should NOT delete it.
  67. *
  68. * Exported as U_COMMON_API for ucurr.cpp
  69. *
  70. * This method is always safe and OK to chain: in the case of a memory or other
  71. * error, it returns an empty set from static memory.
  72. *
  73. * Example:
  74. *
  75. * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...);
  76. *
  77. * @param key The desired UnicodeSet according to the enum in this file.
  78. * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
  79. * may be empty if an error occurred during data loading.
  80. */
  81. U_COMMON_API const UnicodeSet* get(Key key);
  82. /**
  83. * Checks if the UnicodeSet given by key1 contains the given string.
  84. *
  85. * Exported as U_COMMON_API for numparse_decimal.cpp
  86. *
  87. * @param str The string to check.
  88. * @param key1 The set to check.
  89. * @return key1 if the set contains str, or NONE if not.
  90. */
  91. U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
  92. /**
  93. * Checks if the UnicodeSet given by either key1 or key2 contains the string.
  94. *
  95. * Exported as U_COMMON_API for numparse_decimal.cpp
  96. *
  97. * @param str The string to check.
  98. * @param key1 The first set to check.
  99. * @param key2 The second set to check.
  100. * @return key1 if that set contains str; key2 if that set contains str; or
  101. * NONE if neither set contains str.
  102. */
  103. U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
  104. // TODO: Load these from data: ICU-20108
  105. // Unused in C++:
  106. // Key chooseCurrency(UnicodeString str);
  107. // Used instead:
  108. static const struct {
  109. Key key;
  110. UChar32 exemplar;
  111. } kCurrencyEntries[] = {
  112. {DOLLAR_SIGN, u'$'},
  113. {POUND_SIGN, u'£'},
  114. {RUPEE_SIGN, u'₹'},
  115. {YEN_SIGN, u'¥'},
  116. {WON_SIGN, u'₩'},
  117. };
  118. } // namespace unisets
  119. U_NAMESPACE_END
  120. #endif //__STATIC_UNICODE_SETS_H__
  121. #endif /* #if !UCONFIG_NO_FORMATTING */