number_modifiers.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. // © 2017 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #if !UCONFIG_NO_FORMATTING
  5. #include "umutex.h"
  6. #include "ucln_cmn.h"
  7. #include "ucln_in.h"
  8. #include "number_modifiers.h"
  9. using namespace icu;
  10. using namespace icu::number;
  11. using namespace icu::number::impl;
  12. namespace {
  13. // TODO: This is copied from simpleformatter.cpp
  14. const int32_t ARG_NUM_LIMIT = 0x100;
  15. // These are the default currency spacing UnicodeSets in CLDR.
  16. // Pre-compute them for performance.
  17. // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
  18. icu::UInitOnce gDefaultCurrencySpacingInitOnce {};
  19. UnicodeSet *UNISET_DIGIT = nullptr;
  20. UnicodeSet *UNISET_NOTSZ = nullptr;
  21. UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
  22. delete UNISET_DIGIT;
  23. UNISET_DIGIT = nullptr;
  24. delete UNISET_NOTSZ;
  25. UNISET_NOTSZ = nullptr;
  26. gDefaultCurrencySpacingInitOnce.reset();
  27. return true;
  28. }
  29. void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
  30. ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
  31. UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
  32. UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status);
  33. if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) {
  34. status = U_MEMORY_ALLOCATION_ERROR;
  35. return;
  36. }
  37. UNISET_DIGIT->freeze();
  38. UNISET_NOTSZ->freeze();
  39. }
  40. } // namespace
  41. Modifier::~Modifier() = default;
  42. Modifier::Parameters::Parameters()
  43. : obj(nullptr) {}
  44. Modifier::Parameters::Parameters(
  45. const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
  46. : obj(_obj), signum(_signum), plural(_plural) {}
  47. ModifierStore::~ModifierStore() = default;
  48. AdoptingSignumModifierStore::~AdoptingSignumModifierStore() {
  49. for (const Modifier *mod : mods) {
  50. delete mod;
  51. }
  52. }
  53. AdoptingSignumModifierStore&
  54. AdoptingSignumModifierStore::operator=(AdoptingSignumModifierStore&& other) noexcept {
  55. for (size_t i=0; i<SIGNUM_COUNT; i++) {
  56. this->mods[i] = other.mods[i];
  57. other.mods[i] = nullptr;
  58. }
  59. return *this;
  60. }
  61. int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
  62. UErrorCode &status) const {
  63. // Insert the suffix first since inserting the prefix will change the rightIndex
  64. int length = output.insert(rightIndex, fSuffix, fField, status);
  65. length += output.insert(leftIndex, fPrefix, fField, status);
  66. return length;
  67. }
  68. int32_t ConstantAffixModifier::getPrefixLength() const {
  69. return fPrefix.length();
  70. }
  71. int32_t ConstantAffixModifier::getCodePointCount() const {
  72. return fPrefix.countChar32() + fSuffix.countChar32();
  73. }
  74. bool ConstantAffixModifier::isStrong() const {
  75. return fStrong;
  76. }
  77. bool ConstantAffixModifier::containsField(Field field) const {
  78. (void)field;
  79. // This method is not currently used.
  80. UPRV_UNREACHABLE_EXIT;
  81. }
  82. void ConstantAffixModifier::getParameters(Parameters& output) const {
  83. (void)output;
  84. // This method is not currently used.
  85. UPRV_UNREACHABLE_EXIT;
  86. }
  87. bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
  88. auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
  89. if (_other == nullptr) {
  90. return false;
  91. }
  92. return fPrefix == _other->fPrefix
  93. && fSuffix == _other->fSuffix
  94. && fField == _other->fField
  95. && fStrong == _other->fStrong;
  96. }
  97. SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
  98. : SimpleModifier(simpleFormatter, field, strong, {}) {}
  99. SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
  100. const Modifier::Parameters parameters)
  101. : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
  102. fParameters(parameters) {
  103. int32_t argLimit = SimpleFormatter::getArgumentLimit(
  104. fCompiledPattern.getBuffer(), fCompiledPattern.length());
  105. if (argLimit == 0) {
  106. // No arguments in compiled pattern
  107. fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
  108. U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
  109. // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
  110. fSuffixOffset = -1;
  111. fSuffixLength = 0;
  112. } else {
  113. U_ASSERT(argLimit == 1);
  114. if (fCompiledPattern.charAt(1) != 0) {
  115. // Found prefix
  116. fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
  117. fSuffixOffset = 3 + fPrefixLength;
  118. } else {
  119. // No prefix
  120. fPrefixLength = 0;
  121. fSuffixOffset = 2;
  122. }
  123. if (3 + fPrefixLength < fCompiledPattern.length()) {
  124. // Found suffix
  125. fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
  126. } else {
  127. // No suffix
  128. fSuffixLength = 0;
  129. }
  130. }
  131. }
  132. SimpleModifier::SimpleModifier()
  133. : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
  134. }
  135. int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
  136. UErrorCode &status) const {
  137. return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
  138. }
  139. int32_t SimpleModifier::getPrefixLength() const {
  140. return fPrefixLength;
  141. }
  142. int32_t SimpleModifier::getCodePointCount() const {
  143. int32_t count = 0;
  144. if (fPrefixLength > 0) {
  145. count += fCompiledPattern.countChar32(2, fPrefixLength);
  146. }
  147. if (fSuffixLength > 0) {
  148. count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
  149. }
  150. return count;
  151. }
  152. bool SimpleModifier::isStrong() const {
  153. return fStrong;
  154. }
  155. bool SimpleModifier::containsField(Field field) const {
  156. (void)field;
  157. // This method is not currently used.
  158. UPRV_UNREACHABLE_EXIT;
  159. }
  160. void SimpleModifier::getParameters(Parameters& output) const {
  161. output = fParameters;
  162. }
  163. bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
  164. auto* _other = dynamic_cast<const SimpleModifier*>(&other);
  165. if (_other == nullptr) {
  166. return false;
  167. }
  168. if (fParameters.obj != nullptr) {
  169. return fParameters.obj == _other->fParameters.obj;
  170. }
  171. return fCompiledPattern == _other->fCompiledPattern
  172. && fField == _other->fField
  173. && fStrong == _other->fStrong;
  174. }
  175. int32_t
  176. SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
  177. UErrorCode &status) const {
  178. if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
  179. // There is no argument for the inner number; overwrite the entire segment with our string.
  180. return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
  181. } else {
  182. if (fPrefixLength > 0) {
  183. result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
  184. }
  185. if (fSuffixLength > 0) {
  186. result.insert(
  187. endIndex + fPrefixLength,
  188. fCompiledPattern,
  189. 1 + fSuffixOffset,
  190. 1 + fSuffixOffset + fSuffixLength,
  191. fField,
  192. status);
  193. }
  194. return fPrefixLength + fSuffixLength;
  195. }
  196. }
  197. int32_t
  198. SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
  199. int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
  200. Field field, UErrorCode& status) {
  201. const UnicodeString& compiledPattern = compiled.compiledPattern;
  202. int32_t argLimit = SimpleFormatter::getArgumentLimit(
  203. compiledPattern.getBuffer(), compiledPattern.length());
  204. if (argLimit != 2) {
  205. status = U_INTERNAL_PROGRAM_ERROR;
  206. return 0;
  207. }
  208. int32_t offset = 1; // offset into compiledPattern
  209. int32_t length = 0; // chars added to result
  210. int32_t prefixLength = compiledPattern.charAt(offset);
  211. offset++;
  212. if (prefixLength < ARG_NUM_LIMIT) {
  213. // No prefix
  214. prefixLength = 0;
  215. } else {
  216. prefixLength -= ARG_NUM_LIMIT;
  217. result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
  218. offset += prefixLength;
  219. length += prefixLength;
  220. offset++;
  221. }
  222. int32_t infixLength = compiledPattern.charAt(offset);
  223. offset++;
  224. if (infixLength < ARG_NUM_LIMIT) {
  225. // No infix
  226. infixLength = 0;
  227. } else {
  228. infixLength -= ARG_NUM_LIMIT;
  229. result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
  230. offset += infixLength;
  231. length += infixLength;
  232. offset++;
  233. }
  234. int32_t suffixLength;
  235. if (offset == compiledPattern.length()) {
  236. // No suffix
  237. suffixLength = 0;
  238. } else {
  239. suffixLength = compiledPattern.charAt(offset) - ARG_NUM_LIMIT;
  240. offset++;
  241. result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
  242. length += suffixLength;
  243. }
  244. *outPrefixLength = prefixLength;
  245. *outSuffixLength = suffixLength;
  246. return length;
  247. }
  248. int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
  249. UErrorCode &status) const {
  250. int32_t length = output.insert(leftIndex, fPrefix, status);
  251. if (fOverwrite) {
  252. length += output.splice(
  253. leftIndex + length,
  254. rightIndex + length,
  255. UnicodeString(), 0, 0,
  256. kUndefinedField, status);
  257. }
  258. length += output.insert(rightIndex + length, fSuffix, status);
  259. return length;
  260. }
  261. int32_t ConstantMultiFieldModifier::getPrefixLength() const {
  262. return fPrefix.length();
  263. }
  264. int32_t ConstantMultiFieldModifier::getCodePointCount() const {
  265. return fPrefix.codePointCount() + fSuffix.codePointCount();
  266. }
  267. bool ConstantMultiFieldModifier::isStrong() const {
  268. return fStrong;
  269. }
  270. bool ConstantMultiFieldModifier::containsField(Field field) const {
  271. return fPrefix.containsField(field) || fSuffix.containsField(field);
  272. }
  273. void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
  274. output = fParameters;
  275. }
  276. bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
  277. auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
  278. if (_other == nullptr) {
  279. return false;
  280. }
  281. if (fParameters.obj != nullptr) {
  282. return fParameters.obj == _other->fParameters.obj;
  283. }
  284. return fPrefix.contentEquals(_other->fPrefix)
  285. && fSuffix.contentEquals(_other->fSuffix)
  286. && fOverwrite == _other->fOverwrite
  287. && fStrong == _other->fStrong;
  288. }
  289. CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
  290. const FormattedStringBuilder &suffix,
  291. bool overwrite,
  292. bool strong,
  293. const DecimalFormatSymbols &symbols,
  294. UErrorCode &status)
  295. : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
  296. // Check for currency spacing. Do not build the UnicodeSets unless there is
  297. // a currency code point at a boundary.
  298. if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
  299. int prefixCp = prefix.getLastCodePoint();
  300. UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
  301. if (prefixUnicodeSet.contains(prefixCp)) {
  302. fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
  303. fAfterPrefixUnicodeSet.freeze();
  304. fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
  305. } else {
  306. fAfterPrefixUnicodeSet.setToBogus();
  307. fAfterPrefixInsert.setToBogus();
  308. }
  309. } else {
  310. fAfterPrefixUnicodeSet.setToBogus();
  311. fAfterPrefixInsert.setToBogus();
  312. }
  313. if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
  314. int suffixCp = suffix.getFirstCodePoint();
  315. UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
  316. if (suffixUnicodeSet.contains(suffixCp)) {
  317. fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
  318. fBeforeSuffixUnicodeSet.freeze();
  319. fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
  320. } else {
  321. fBeforeSuffixUnicodeSet.setToBogus();
  322. fBeforeSuffixInsert.setToBogus();
  323. }
  324. } else {
  325. fBeforeSuffixUnicodeSet.setToBogus();
  326. fBeforeSuffixInsert.setToBogus();
  327. }
  328. }
  329. int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
  330. UErrorCode &status) const {
  331. // Currency spacing logic
  332. int length = 0;
  333. if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
  334. fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
  335. // TODO: Should we use the CURRENCY field here?
  336. length += output.insert(
  337. leftIndex,
  338. fAfterPrefixInsert,
  339. kUndefinedField,
  340. status);
  341. }
  342. if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
  343. fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
  344. // TODO: Should we use the CURRENCY field here?
  345. length += output.insert(
  346. rightIndex + length,
  347. fBeforeSuffixInsert,
  348. kUndefinedField,
  349. status);
  350. }
  351. // Call super for the remaining logic
  352. length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
  353. return length;
  354. }
  355. int32_t
  356. CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
  357. int32_t prefixLen, int32_t suffixStart,
  358. int32_t suffixLen,
  359. const DecimalFormatSymbols &symbols,
  360. UErrorCode &status) {
  361. int length = 0;
  362. bool hasPrefix = (prefixLen > 0);
  363. bool hasSuffix = (suffixLen > 0);
  364. bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
  365. if (hasPrefix && hasNumber) {
  366. length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
  367. }
  368. if (hasSuffix && hasNumber) {
  369. length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
  370. }
  371. return length;
  372. }
  373. int32_t
  374. CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
  375. EAffix affix,
  376. const DecimalFormatSymbols &symbols,
  377. UErrorCode &status) {
  378. // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
  379. // This works even if the last code point in the prefix is 2 code units because the
  380. // field value gets populated to both indices in the field array.
  381. Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
  382. if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
  383. return 0;
  384. }
  385. int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
  386. UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
  387. if (!affixUniset.contains(affixCp)) {
  388. return 0;
  389. }
  390. int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
  391. UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
  392. if (!numberUniset.contains(numberCp)) {
  393. return 0;
  394. }
  395. UnicodeString spacingString = getInsertString(symbols, affix, status);
  396. // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
  397. // It would be more efficient if this could be done before affixes were attached,
  398. // so that it could be prepended/appended instead of inserted.
  399. // However, the build code path is more efficient, and this is the most natural
  400. // place to put currency spacing in the non-build code path.
  401. // TODO: Should we use the CURRENCY field here?
  402. return output.insert(index, spacingString, kUndefinedField, status);
  403. }
  404. UnicodeSet
  405. CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
  406. EAffix affix, UErrorCode &status) {
  407. // Ensure the static defaults are initialized:
  408. umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
  409. if (U_FAILURE(status)) {
  410. return UnicodeSet();
  411. }
  412. const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
  413. position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
  414. affix == SUFFIX,
  415. status);
  416. if (pattern.compare(u"[:digit:]", -1) == 0) {
  417. return *UNISET_DIGIT;
  418. } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) {
  419. return *UNISET_NOTSZ;
  420. } else {
  421. return UnicodeSet(pattern, status);
  422. }
  423. }
  424. UnicodeString
  425. CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
  426. UErrorCode &status) {
  427. return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
  428. }
  429. #endif /* #if !UCONFIG_NO_FORMATTING */