numrange_impl.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. // © 2018 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #if !UCONFIG_NO_FORMATTING
  5. // Allow implicit conversion from char16_t* to UnicodeString for this file:
  6. // Helpful in toString methods and elsewhere.
  7. #define UNISTR_FROM_STRING_EXPLICIT
  8. #include "unicode/numberrangeformatter.h"
  9. #include "numrange_impl.h"
  10. #include "patternprops.h"
  11. #include "pluralranges.h"
  12. #include "uresimp.h"
  13. #include "util.h"
  14. using namespace icu;
  15. using namespace icu::number;
  16. using namespace icu::number::impl;
  17. namespace {
  18. // Helper function for 2-dimensional switch statement
  19. constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
  20. return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
  21. }
  22. struct NumberRangeData {
  23. SimpleFormatter rangePattern;
  24. // Note: approximatelyPattern is unused since ICU 69.
  25. // SimpleFormatter approximatelyPattern;
  26. };
  27. class NumberRangeDataSink : public ResourceSink {
  28. public:
  29. NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
  30. void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) override {
  31. ResourceTable miscTable = value.getTable(status);
  32. if (U_FAILURE(status)) { return; }
  33. for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
  34. if (uprv_strcmp(key, "range") == 0) {
  35. if (hasRangeData()) {
  36. continue; // have already seen this pattern
  37. }
  38. fData.rangePattern = {value.getUnicodeString(status), status};
  39. }
  40. /*
  41. // Note: approximatelyPattern is unused since ICU 69.
  42. else if (uprv_strcmp(key, "approximately") == 0) {
  43. if (hasApproxData()) {
  44. continue; // have already seen this pattern
  45. }
  46. fData.approximatelyPattern = {value.getUnicodeString(status), status};
  47. }
  48. */
  49. }
  50. }
  51. bool hasRangeData() {
  52. return fData.rangePattern.getArgumentLimit() != 0;
  53. }
  54. /*
  55. // Note: approximatelyPattern is unused since ICU 69.
  56. bool hasApproxData() {
  57. return fData.approximatelyPattern.getArgumentLimit() != 0;
  58. }
  59. */
  60. bool isComplete() {
  61. return hasRangeData() /* && hasApproxData() */;
  62. }
  63. void fillInDefaults(UErrorCode& status) {
  64. if (!hasRangeData()) {
  65. fData.rangePattern = {u"{0}–{1}", status};
  66. }
  67. /*
  68. if (!hasApproxData()) {
  69. fData.approximatelyPattern = {u"~{0}", status};
  70. }
  71. */
  72. }
  73. private:
  74. NumberRangeData& fData;
  75. };
  76. void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
  77. if (U_FAILURE(status)) { return; }
  78. LocalUResourceBundlePointer rb(ures_open(nullptr, localeName, &status));
  79. if (U_FAILURE(status)) { return; }
  80. NumberRangeDataSink sink(data);
  81. CharString dataPath;
  82. dataPath.append("NumberElements/", -1, status);
  83. dataPath.append(nsName, -1, status);
  84. dataPath.append("/miscPatterns", -1, status);
  85. if (U_FAILURE(status)) { return; }
  86. UErrorCode localStatus = U_ZERO_ERROR;
  87. ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
  88. if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
  89. status = localStatus;
  90. return;
  91. }
  92. // Fall back to latn if necessary
  93. if (!sink.isComplete()) {
  94. ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
  95. }
  96. sink.fillInDefaults(status);
  97. }
  98. } // namespace
  99. NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
  100. : formatterImpl1(macros.formatter1.fMacros, status),
  101. formatterImpl2(macros.formatter2.fMacros, status),
  102. fSameFormatters(macros.singleFormatter),
  103. fCollapse(macros.collapse),
  104. fIdentityFallback(macros.identityFallback),
  105. fApproximatelyFormatter(status) {
  106. const char* nsName = formatterImpl1.getRawMicroProps().nsName;
  107. if (!fSameFormatters && uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
  108. status = U_ILLEGAL_ARGUMENT_ERROR;
  109. return;
  110. }
  111. NumberRangeData data;
  112. getNumberRangeData(macros.locale.getName(), nsName, data, status);
  113. if (U_FAILURE(status)) { return; }
  114. fRangeFormatter = data.rangePattern;
  115. if (fSameFormatters && (
  116. fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY ||
  117. fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE)) {
  118. MacroProps approximatelyMacros(macros.formatter1.fMacros);
  119. approximatelyMacros.approximately = true;
  120. // Use in-place construction because NumberFormatterImpl has internal self-pointers
  121. fApproximatelyFormatter.~NumberFormatterImpl();
  122. new (&fApproximatelyFormatter) NumberFormatterImpl(approximatelyMacros, status);
  123. }
  124. // TODO: Get locale from PluralRules instead?
  125. fPluralRanges = StandardPluralRanges::forLocale(macros.locale, status);
  126. if (U_FAILURE(status)) { return; }
  127. }
  128. void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
  129. if (U_FAILURE(status)) {
  130. return;
  131. }
  132. MicroProps micros1;
  133. MicroProps micros2;
  134. formatterImpl1.preProcess(data.quantity1, micros1, status);
  135. if (fSameFormatters) {
  136. formatterImpl1.preProcess(data.quantity2, micros2, status);
  137. } else {
  138. formatterImpl2.preProcess(data.quantity2, micros2, status);
  139. }
  140. if (U_FAILURE(status)) {
  141. return;
  142. }
  143. // If any of the affixes are different, an identity is not possible
  144. // and we must use formatRange().
  145. // TODO: Write this as MicroProps operator==() ?
  146. // TODO: Avoid the redundancy of these equality operations with the
  147. // ones in formatRange?
  148. if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
  149. || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
  150. || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
  151. formatRange(data, micros1, micros2, status);
  152. data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
  153. return;
  154. }
  155. // Check for identity
  156. if (equalBeforeRounding) {
  157. data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
  158. } else if (data.quantity1 == data.quantity2) {
  159. data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
  160. } else {
  161. data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
  162. }
  163. switch (identity2d(fIdentityFallback, data.identityResult)) {
  164. case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
  165. UNUM_IDENTITY_RESULT_NOT_EQUAL):
  166. case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
  167. UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
  168. case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
  169. UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
  170. case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
  171. UNUM_IDENTITY_RESULT_NOT_EQUAL):
  172. case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
  173. UNUM_IDENTITY_RESULT_NOT_EQUAL):
  174. case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
  175. UNUM_IDENTITY_RESULT_NOT_EQUAL):
  176. formatRange(data, micros1, micros2, status);
  177. break;
  178. case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
  179. UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
  180. case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
  181. UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
  182. case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
  183. UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
  184. formatApproximately(data, micros1, micros2, status);
  185. break;
  186. case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
  187. UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
  188. case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
  189. UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
  190. case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
  191. UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
  192. formatSingleValue(data, micros1, micros2, status);
  193. break;
  194. default:
  195. UPRV_UNREACHABLE_EXIT;
  196. }
  197. }
  198. void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
  199. MicroProps& micros1, MicroProps& micros2,
  200. UErrorCode& status) const {
  201. if (U_FAILURE(status)) { return; }
  202. if (fSameFormatters) {
  203. int32_t length = NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, data.getStringRef(), 0, status);
  204. NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
  205. } else {
  206. formatRange(data, micros1, micros2, status);
  207. }
  208. }
  209. void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
  210. MicroProps& micros1, MicroProps& micros2,
  211. UErrorCode& status) const {
  212. if (U_FAILURE(status)) { return; }
  213. if (fSameFormatters) {
  214. // Re-format using the approximately formatter:
  215. MicroProps microsAppx;
  216. data.quantity1.resetExponent();
  217. fApproximatelyFormatter.preProcess(data.quantity1, microsAppx, status);
  218. int32_t length = NumberFormatterImpl::writeNumber(microsAppx.simple, data.quantity1, data.getStringRef(), 0, status);
  219. length += microsAppx.modInner->apply(data.getStringRef(), 0, length, status);
  220. length += microsAppx.modMiddle->apply(data.getStringRef(), 0, length, status);
  221. microsAppx.modOuter->apply(data.getStringRef(), 0, length, status);
  222. } else {
  223. formatRange(data, micros1, micros2, status);
  224. }
  225. }
  226. void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
  227. MicroProps& micros1, MicroProps& micros2,
  228. UErrorCode& status) const {
  229. if (U_FAILURE(status)) { return; }
  230. // modInner is always notation (scientific); collapsable in ALL.
  231. // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
  232. // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
  233. // Never collapse an outer mod but not an inner mod.
  234. bool collapseOuter, collapseMiddle, collapseInner;
  235. switch (fCollapse) {
  236. case UNUM_RANGE_COLLAPSE_ALL:
  237. case UNUM_RANGE_COLLAPSE_AUTO:
  238. case UNUM_RANGE_COLLAPSE_UNIT:
  239. {
  240. // OUTER MODIFIER
  241. collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
  242. if (!collapseOuter) {
  243. // Never collapse inner mods if outer mods are not collapsable
  244. collapseMiddle = false;
  245. collapseInner = false;
  246. break;
  247. }
  248. // MIDDLE MODIFIER
  249. collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
  250. if (!collapseMiddle) {
  251. // Never collapse inner mods if outer mods are not collapsable
  252. collapseInner = false;
  253. break;
  254. }
  255. // MIDDLE MODIFIER HEURISTICS
  256. // (could disable collapsing of the middle modifier)
  257. // The modifiers are equal by this point, so we can look at just one of them.
  258. const Modifier* mm = micros1.modMiddle;
  259. if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
  260. // Only collapse if the modifier is a unit.
  261. // TODO: Make a better way to check for a unit?
  262. // TODO: Handle case where the modifier has both notation and unit (compact currency)?
  263. if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD})
  264. && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) {
  265. collapseMiddle = false;
  266. }
  267. } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
  268. // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
  269. if (mm->getCodePointCount() <= 1) {
  270. collapseMiddle = false;
  271. }
  272. }
  273. if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
  274. collapseInner = false;
  275. break;
  276. }
  277. // INNER MODIFIER
  278. collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
  279. // All done checking for collapsibility.
  280. break;
  281. }
  282. default:
  283. collapseOuter = false;
  284. collapseMiddle = false;
  285. collapseInner = false;
  286. break;
  287. }
  288. FormattedStringBuilder& string = data.getStringRef();
  289. int32_t lengthPrefix = 0;
  290. int32_t length1 = 0;
  291. int32_t lengthInfix = 0;
  292. int32_t length2 = 0;
  293. int32_t lengthSuffix = 0;
  294. // Use #define so that these are evaluated at the call site.
  295. #define UPRV_INDEX_0 (lengthPrefix)
  296. #define UPRV_INDEX_1 (lengthPrefix + length1)
  297. #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
  298. #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
  299. #define UPRV_INDEX_4 (lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix)
  300. int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
  301. fRangeFormatter,
  302. string,
  303. 0,
  304. &lengthPrefix,
  305. &lengthSuffix,
  306. kUndefinedField,
  307. status);
  308. if (U_FAILURE(status)) { return; }
  309. lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
  310. U_ASSERT(lengthInfix > 0);
  311. // SPACING HEURISTIC
  312. // Add spacing unless all modifiers are collapsed.
  313. // TODO: add API to control this?
  314. // TODO: Use a data-driven heuristic like currency spacing?
  315. // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
  316. {
  317. bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
  318. bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
  319. bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
  320. if (repeatInner || repeatMiddle || repeatOuter) {
  321. // Add spacing if there is not already spacing
  322. if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
  323. lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status);
  324. }
  325. if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
  326. lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status);
  327. }
  328. }
  329. }
  330. length1 += NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, string, UPRV_INDEX_0, status);
  331. // ICU-21684: Write the second number to a temp string to avoid repeated insert operations
  332. FormattedStringBuilder tempString;
  333. NumberFormatterImpl::writeNumber(micros2.simple, data.quantity2, tempString, 0, status);
  334. length2 += string.insert(UPRV_INDEX_2, tempString, status);
  335. // TODO: Support padding?
  336. if (collapseInner) {
  337. const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
  338. lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
  339. lengthPrefix += mod.getPrefixLength();
  340. lengthSuffix -= mod.getPrefixLength();
  341. } else {
  342. length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
  343. length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
  344. }
  345. if (collapseMiddle) {
  346. const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
  347. lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
  348. lengthPrefix += mod.getPrefixLength();
  349. lengthSuffix -= mod.getPrefixLength();
  350. } else {
  351. length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
  352. length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
  353. }
  354. if (collapseOuter) {
  355. const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
  356. lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
  357. lengthPrefix += mod.getPrefixLength();
  358. lengthSuffix -= mod.getPrefixLength();
  359. } else {
  360. length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
  361. length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
  362. }
  363. // Now that all pieces are added, save the span info.
  364. data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, UPRV_INDEX_0, length1, status);
  365. data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, UPRV_INDEX_2, length2, status);
  366. }
  367. const Modifier&
  368. NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
  369. Modifier::Parameters parameters;
  370. first.getParameters(parameters);
  371. if (parameters.obj == nullptr) {
  372. // No plural form; return a fallback (e.g., the first)
  373. return first;
  374. }
  375. StandardPlural::Form firstPlural = parameters.plural;
  376. second.getParameters(parameters);
  377. if (parameters.obj == nullptr) {
  378. // No plural form; return a fallback (e.g., the first)
  379. return first;
  380. }
  381. StandardPlural::Form secondPlural = parameters.plural;
  382. // Get the required plural form from data
  383. StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
  384. // Get and return the new Modifier
  385. const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
  386. U_ASSERT(mod != nullptr);
  387. return *mod;
  388. }
  389. #endif /* #if !UCONFIG_NO_FORMATTING */