nfrs.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. * Copyright (C) 1997-2015, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. ******************************************************************************
  8. * file name: nfrs.cpp
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * Modification history
  14. * Date Name Comments
  15. * 10/11/2001 Doug Ported from ICU4J
  16. */
  17. #include "nfrs.h"
  18. #if U_HAVE_RBNF
  19. #include "unicode/uchar.h"
  20. #include "nfrule.h"
  21. #include "nfrlist.h"
  22. #include "patternprops.h"
  23. #include "putilimp.h"
  24. #ifdef RBNF_DEBUG
  25. #include "cmemory.h"
  26. #endif
  27. enum {
  28. /** -x */
  29. NEGATIVE_RULE_INDEX = 0,
  30. /** x.x */
  31. IMPROPER_FRACTION_RULE_INDEX = 1,
  32. /** 0.x */
  33. PROPER_FRACTION_RULE_INDEX = 2,
  34. /** x.0 */
  35. DEFAULT_RULE_INDEX = 3,
  36. /** Inf */
  37. INFINITY_RULE_INDEX = 4,
  38. /** NaN */
  39. NAN_RULE_INDEX = 5,
  40. NON_NUMERICAL_RULE_LENGTH = 6
  41. };
  42. U_NAMESPACE_BEGIN
  43. #if 0
  44. // euclid's algorithm works with doubles
  45. // note, doubles only get us up to one quadrillion or so, which
  46. // isn't as much range as we get with longs. We probably still
  47. // want either 64-bit math, or BigInteger.
  48. static int64_t
  49. util_lcm(int64_t x, int64_t y)
  50. {
  51. x.abs();
  52. y.abs();
  53. if (x == 0 || y == 0) {
  54. return 0;
  55. } else {
  56. do {
  57. if (x < y) {
  58. int64_t t = x; x = y; y = t;
  59. }
  60. x -= y * (x/y);
  61. } while (x != 0);
  62. return y;
  63. }
  64. }
  65. #else
  66. /**
  67. * Calculates the least common multiple of x and y.
  68. */
  69. static int64_t
  70. util_lcm(int64_t x, int64_t y)
  71. {
  72. // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
  73. // vol. 2, 1st ed., pp. 298-299
  74. int64_t x1 = x;
  75. int64_t y1 = y;
  76. int p2 = 0;
  77. while ((x1 & 1) == 0 && (y1 & 1) == 0) {
  78. ++p2;
  79. x1 >>= 1;
  80. y1 >>= 1;
  81. }
  82. int64_t t;
  83. if ((x1 & 1) == 1) {
  84. t = -y1;
  85. } else {
  86. t = x1;
  87. }
  88. while (t != 0) {
  89. while ((t & 1) == 0) {
  90. t = t >> 1;
  91. }
  92. if (t > 0) {
  93. x1 = t;
  94. } else {
  95. y1 = -t;
  96. }
  97. t = x1 - y1;
  98. }
  99. int64_t gcd = x1 << p2;
  100. // x * y == gcd(x, y) * lcm(x, y)
  101. return x / gcd * y;
  102. }
  103. #endif
  104. static const char16_t gPercent = 0x0025;
  105. static const char16_t gColon = 0x003a;
  106. static const char16_t gSemicolon = 0x003b;
  107. static const char16_t gLineFeed = 0x000a;
  108. static const char16_t gPercentPercent[] =
  109. {
  110. 0x25, 0x25, 0
  111. }; /* "%%" */
  112. static const char16_t gNoparse[] =
  113. {
  114. 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
  115. }; /* "@noparse" */
  116. NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
  117. : name()
  118. , rules(0)
  119. , owner(_owner)
  120. , fractionRules()
  121. , fIsFractionRuleSet(false)
  122. , fIsPublic(false)
  123. , fIsParseable(true)
  124. {
  125. for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
  126. nonNumericalRules[i] = nullptr;
  127. }
  128. if (U_FAILURE(status)) {
  129. return;
  130. }
  131. UnicodeString& description = descriptions[index]; // !!! make sure index is valid
  132. if (description.length() == 0) {
  133. // throw new IllegalArgumentException("Empty rule set description");
  134. status = U_PARSE_ERROR;
  135. return;
  136. }
  137. // if the description begins with a rule set name (the rule set
  138. // name can be omitted in formatter descriptions that consist
  139. // of only one rule set), copy it out into our "name" member
  140. // and delete it from the description
  141. if (description.charAt(0) == gPercent) {
  142. int32_t pos = description.indexOf(gColon);
  143. if (pos == -1) {
  144. // throw new IllegalArgumentException("Rule set name doesn't end in colon");
  145. status = U_PARSE_ERROR;
  146. } else {
  147. name.setTo(description, 0, pos);
  148. while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
  149. }
  150. description.remove(0, pos);
  151. }
  152. } else {
  153. name.setTo(UNICODE_STRING_SIMPLE("%default"));
  154. }
  155. if (description.length() == 0) {
  156. // throw new IllegalArgumentException("Empty rule set description");
  157. status = U_PARSE_ERROR;
  158. }
  159. fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
  160. if ( name.endsWith(gNoparse,8) ) {
  161. fIsParseable = false;
  162. name.truncate(name.length()-8); // remove the @noparse from the name
  163. }
  164. // all of the other members of NFRuleSet are initialized
  165. // by parseRules()
  166. }
  167. void
  168. NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
  169. {
  170. // start by creating a Vector whose elements are Strings containing
  171. // the descriptions of the rules (one rule per element). The rules
  172. // are separated by semicolons (there's no escape facility: ALL
  173. // semicolons are rule delimiters)
  174. if (U_FAILURE(status)) {
  175. return;
  176. }
  177. // ensure we are starting with an empty rule list
  178. rules.deleteAll();
  179. // dlf - the original code kept a separate description array for no reason,
  180. // so I got rid of it. The loop was too complex so I simplified it.
  181. UnicodeString currentDescription;
  182. int32_t oldP = 0;
  183. while (oldP < description.length()) {
  184. int32_t p = description.indexOf(gSemicolon, oldP);
  185. if (p == -1) {
  186. p = description.length();
  187. }
  188. currentDescription.setTo(description, oldP, p - oldP);
  189. NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
  190. oldP = p + 1;
  191. }
  192. // for rules that didn't specify a base value, their base values
  193. // were initialized to 0. Make another pass through the list and
  194. // set all those rules' base values. We also remove any special
  195. // rules from the list and put them into their own member variables
  196. int64_t defaultBaseValue = 0;
  197. // (this isn't a for loop because we might be deleting items from
  198. // the vector-- we want to make sure we only increment i when
  199. // we _didn't_ delete anything from the vector)
  200. int32_t rulesSize = rules.size();
  201. for (int32_t i = 0; i < rulesSize; i++) {
  202. NFRule* rule = rules[i];
  203. int64_t baseValue = rule->getBaseValue();
  204. if (baseValue == 0) {
  205. // if the rule's base value is 0, fill in a default
  206. // base value (this will be 1 plus the preceding
  207. // rule's base value for regular rule sets, and the
  208. // same as the preceding rule's base value in fraction
  209. // rule sets)
  210. rule->setBaseValue(defaultBaseValue, status);
  211. }
  212. else {
  213. // if it's a regular rule that already knows its base value,
  214. // check to make sure the rules are in order, and update
  215. // the default base value for the next rule
  216. if (baseValue < defaultBaseValue) {
  217. // throw new IllegalArgumentException("Rules are not in order");
  218. status = U_PARSE_ERROR;
  219. return;
  220. }
  221. defaultBaseValue = baseValue;
  222. }
  223. if (!fIsFractionRuleSet) {
  224. ++defaultBaseValue;
  225. }
  226. }
  227. }
  228. /**
  229. * Set one of the non-numerical rules.
  230. * @param rule The rule to set.
  231. */
  232. void NFRuleSet::setNonNumericalRule(NFRule *rule) {
  233. int64_t baseValue = rule->getBaseValue();
  234. if (baseValue == NFRule::kNegativeNumberRule) {
  235. delete nonNumericalRules[NEGATIVE_RULE_INDEX];
  236. nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
  237. }
  238. else if (baseValue == NFRule::kImproperFractionRule) {
  239. setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, true);
  240. }
  241. else if (baseValue == NFRule::kProperFractionRule) {
  242. setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, true);
  243. }
  244. else if (baseValue == NFRule::kDefaultRule) {
  245. setBestFractionRule(DEFAULT_RULE_INDEX, rule, true);
  246. }
  247. else if (baseValue == NFRule::kInfinityRule) {
  248. delete nonNumericalRules[INFINITY_RULE_INDEX];
  249. nonNumericalRules[INFINITY_RULE_INDEX] = rule;
  250. }
  251. else if (baseValue == NFRule::kNaNRule) {
  252. delete nonNumericalRules[NAN_RULE_INDEX];
  253. nonNumericalRules[NAN_RULE_INDEX] = rule;
  254. }
  255. }
  256. /**
  257. * Determine the best fraction rule to use. Rules matching the decimal point from
  258. * DecimalFormatSymbols become the main set of rules to use.
  259. * @param originalIndex The index into nonNumericalRules
  260. * @param newRule The new rule to consider
  261. * @param rememberRule Should the new rule be added to fractionRules.
  262. */
  263. void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
  264. if (rememberRule) {
  265. fractionRules.add(newRule);
  266. }
  267. NFRule *bestResult = nonNumericalRules[originalIndex];
  268. if (bestResult == nullptr) {
  269. nonNumericalRules[originalIndex] = newRule;
  270. }
  271. else {
  272. // We have more than one. Which one is better?
  273. const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
  274. if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
  275. == newRule->getDecimalPoint())
  276. {
  277. nonNumericalRules[originalIndex] = newRule;
  278. }
  279. // else leave it alone
  280. }
  281. }
  282. NFRuleSet::~NFRuleSet()
  283. {
  284. for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
  285. if (i != IMPROPER_FRACTION_RULE_INDEX
  286. && i != PROPER_FRACTION_RULE_INDEX
  287. && i != DEFAULT_RULE_INDEX)
  288. {
  289. delete nonNumericalRules[i];
  290. }
  291. // else it will be deleted via NFRuleList fractionRules
  292. }
  293. }
  294. static UBool
  295. util_equalRules(const NFRule* rule1, const NFRule* rule2)
  296. {
  297. if (rule1) {
  298. if (rule2) {
  299. return *rule1 == *rule2;
  300. }
  301. } else if (!rule2) {
  302. return true;
  303. }
  304. return false;
  305. }
  306. bool
  307. NFRuleSet::operator==(const NFRuleSet& rhs) const
  308. {
  309. if (rules.size() == rhs.rules.size() &&
  310. fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
  311. name == rhs.name) {
  312. // ...then compare the non-numerical rule lists...
  313. for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
  314. if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
  315. return false;
  316. }
  317. }
  318. // ...then compare the rule lists...
  319. for (uint32_t i = 0; i < rules.size(); ++i) {
  320. if (*rules[i] != *rhs.rules[i]) {
  321. return false;
  322. }
  323. }
  324. return true;
  325. }
  326. return false;
  327. }
  328. void
  329. NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
  330. for (uint32_t i = 0; i < rules.size(); ++i) {
  331. rules[i]->setDecimalFormatSymbols(newSymbols, status);
  332. }
  333. // Switch the fraction rules to mirror the DecimalFormatSymbols.
  334. for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) {
  335. if (nonNumericalRules[nonNumericalIdx]) {
  336. for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
  337. NFRule *fractionRule = fractionRules[fIdx];
  338. if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
  339. setBestFractionRule(nonNumericalIdx, fractionRule, false);
  340. }
  341. }
  342. }
  343. }
  344. for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
  345. NFRule *rule = nonNumericalRules[nnrIdx];
  346. if (rule) {
  347. rule->setDecimalFormatSymbols(newSymbols, status);
  348. }
  349. }
  350. }
  351. #define RECURSION_LIMIT 64
  352. void
  353. NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
  354. {
  355. if (recursionCount >= RECURSION_LIMIT) {
  356. // stop recursion
  357. status = U_INVALID_STATE_ERROR;
  358. return;
  359. }
  360. const NFRule *rule = findNormalRule(number);
  361. if (rule) { // else error, but can't report it
  362. rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
  363. }
  364. }
  365. void
  366. NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
  367. {
  368. if (recursionCount >= RECURSION_LIMIT) {
  369. // stop recursion
  370. status = U_INVALID_STATE_ERROR;
  371. return;
  372. }
  373. const NFRule *rule = findDoubleRule(number);
  374. if (rule) { // else error, but can't report it
  375. rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
  376. }
  377. }
  378. const NFRule*
  379. NFRuleSet::findDoubleRule(double number) const
  380. {
  381. // if this is a fraction rule set, use findFractionRuleSetRule()
  382. if (isFractionRuleSet()) {
  383. return findFractionRuleSetRule(number);
  384. }
  385. if (uprv_isNaN(number)) {
  386. const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
  387. if (!rule) {
  388. rule = owner->getDefaultNaNRule();
  389. }
  390. return rule;
  391. }
  392. // if the number is negative, return the negative number rule
  393. // (if there isn't a negative-number rule, we pretend it's a
  394. // positive number)
  395. if (number < 0) {
  396. if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
  397. return nonNumericalRules[NEGATIVE_RULE_INDEX];
  398. } else {
  399. number = -number;
  400. }
  401. }
  402. if (uprv_isInfinite(number)) {
  403. const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
  404. if (!rule) {
  405. rule = owner->getDefaultInfinityRule();
  406. }
  407. return rule;
  408. }
  409. // if the number isn't an integer, we use one of the fraction rules...
  410. if (number != uprv_floor(number)) {
  411. // if the number is between 0 and 1, return the proper
  412. // fraction rule
  413. if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
  414. return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
  415. }
  416. // otherwise, return the improper fraction rule
  417. else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
  418. return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
  419. }
  420. }
  421. // if there's a default rule, use it to format the number
  422. if (nonNumericalRules[DEFAULT_RULE_INDEX]) {
  423. return nonNumericalRules[DEFAULT_RULE_INDEX];
  424. }
  425. // and if we haven't yet returned a rule, use findNormalRule()
  426. // to find the applicable rule
  427. int64_t r = util64_fromDouble(number + 0.5);
  428. return findNormalRule(r);
  429. }
  430. const NFRule *
  431. NFRuleSet::findNormalRule(int64_t number) const
  432. {
  433. // if this is a fraction rule set, use findFractionRuleSetRule()
  434. // to find the rule (we should only go into this clause if the
  435. // value is 0)
  436. if (fIsFractionRuleSet) {
  437. return findFractionRuleSetRule((double)number);
  438. }
  439. // if the number is negative, return the negative-number rule
  440. // (if there isn't one, pretend the number is positive)
  441. if (number < 0) {
  442. if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
  443. return nonNumericalRules[NEGATIVE_RULE_INDEX];
  444. } else {
  445. number = -number;
  446. }
  447. }
  448. // we have to repeat the preceding two checks, even though we
  449. // do them in findRule(), because the version of format() that
  450. // takes a long bypasses findRule() and goes straight to this
  451. // function. This function does skip the fraction rules since
  452. // we know the value is an integer (it also skips the default
  453. // rule, since it's considered a fraction rule. Skipping the
  454. // default rule in this function is also how we avoid infinite
  455. // recursion)
  456. // {dlf} unfortunately this fails if there are no rules except
  457. // special rules. If there are no rules, use the default rule.
  458. // binary-search the rule list for the applicable rule
  459. // (a rule is used for all values from its base value to
  460. // the next rule's base value)
  461. int32_t hi = rules.size();
  462. if (hi > 0) {
  463. int32_t lo = 0;
  464. while (lo < hi) {
  465. int32_t mid = (lo + hi) / 2;
  466. if (rules[mid]->getBaseValue() == number) {
  467. return rules[mid];
  468. }
  469. else if (rules[mid]->getBaseValue() > number) {
  470. hi = mid;
  471. }
  472. else {
  473. lo = mid + 1;
  474. }
  475. }
  476. if (hi == 0) { // bad rule set, minimum base > 0
  477. return nullptr; // want to throw exception here
  478. }
  479. NFRule *result = rules[hi - 1];
  480. // use shouldRollBack() to see whether we need to invoke the
  481. // rollback rule (see shouldRollBack()'s documentation for
  482. // an explanation of the rollback rule). If we do, roll back
  483. // one rule and return that one instead of the one we'd normally
  484. // return
  485. if (result->shouldRollBack(number)) {
  486. if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
  487. return nullptr;
  488. }
  489. result = rules[hi - 2];
  490. }
  491. return result;
  492. }
  493. // else use the default rule
  494. return nonNumericalRules[DEFAULT_RULE_INDEX];
  495. }
  496. /**
  497. * If this rule is a fraction rule set, this function is used by
  498. * findRule() to select the most appropriate rule for formatting
  499. * the number. Basically, the base value of each rule in the rule
  500. * set is treated as the denominator of a fraction. Whichever
  501. * denominator can produce the fraction closest in value to the
  502. * number passed in is the result. If there's a tie, the earlier
  503. * one in the list wins. (If there are two rules in a row with the
  504. * same base value, the first one is used when the numerator of the
  505. * fraction would be 1, and the second rule is used the rest of the
  506. * time.
  507. * @param number The number being formatted (which will always be
  508. * a number between 0 and 1)
  509. * @return The rule to use to format this number
  510. */
  511. const NFRule*
  512. NFRuleSet::findFractionRuleSetRule(double number) const
  513. {
  514. // the obvious way to do this (multiply the value being formatted
  515. // by each rule's base value until you get an integral result)
  516. // doesn't work because of rounding error. This method is more
  517. // accurate
  518. // find the least common multiple of the rules' base values
  519. // and multiply this by the number being formatted. This is
  520. // all the precision we need, and we can do all of the rest
  521. // of the math using integer arithmetic
  522. int64_t leastCommonMultiple = rules[0]->getBaseValue();
  523. int64_t numerator;
  524. {
  525. for (uint32_t i = 1; i < rules.size(); ++i) {
  526. leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
  527. }
  528. numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
  529. }
  530. // for each rule, do the following...
  531. int64_t tempDifference;
  532. int64_t difference = util64_fromDouble(uprv_maxMantissa());
  533. int32_t winner = 0;
  534. for (uint32_t i = 0; i < rules.size(); ++i) {
  535. // "numerator" is the numerator of the fraction if the
  536. // denominator is the LCD. The numerator if the rule's
  537. // base value is the denominator is "numerator" times the
  538. // base value divided bythe LCD. Here we check to see if
  539. // that's an integer, and if not, how close it is to being
  540. // an integer.
  541. tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
  542. // normalize the result of the above calculation: we want
  543. // the numerator's distance from the CLOSEST multiple
  544. // of the LCD
  545. if (leastCommonMultiple - tempDifference < tempDifference) {
  546. tempDifference = leastCommonMultiple - tempDifference;
  547. }
  548. // if this is as close as we've come, keep track of how close
  549. // that is, and the line number of the rule that did it. If
  550. // we've scored a direct hit, we don't have to look at any more
  551. // rules
  552. if (tempDifference < difference) {
  553. difference = tempDifference;
  554. winner = i;
  555. if (difference == 0) {
  556. break;
  557. }
  558. }
  559. }
  560. // if we have two successive rules that both have the winning base
  561. // value, then the first one (the one we found above) is used if
  562. // the numerator of the fraction is 1 and the second one is used if
  563. // the numerator of the fraction is anything else (this lets us
  564. // do things like "one third"/"two thirds" without having to define
  565. // a whole bunch of extra rule sets)
  566. if ((unsigned)(winner + 1) < rules.size() &&
  567. rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
  568. double n = ((double)rules[winner]->getBaseValue()) * number;
  569. if (n < 0.5 || n >= 2) {
  570. ++winner;
  571. }
  572. }
  573. // finally, return the winning rule
  574. return rules[winner];
  575. }
  576. /**
  577. * Parses a string. Matches the string to be parsed against each
  578. * of its rules (with a base value less than upperBound) and returns
  579. * the value produced by the rule that matched the most characters
  580. * in the source string.
  581. * @param text The string to parse
  582. * @param parsePosition The initial position is ignored and assumed
  583. * to be 0. On exit, this object has been updated to point to the
  584. * first character position this rule set didn't consume.
  585. * @param upperBound Limits the rules that can be allowed to match.
  586. * Only rules whose base values are strictly less than upperBound
  587. * are considered.
  588. * @return The numerical result of parsing this string. This will
  589. * be the matching rule's base value, composed appropriately with
  590. * the results of matching any of its substitutions. The object
  591. * will be an instance of Long if it's an integral value; otherwise,
  592. * it will be an instance of Double. This function always returns
  593. * a valid object: If nothing matched the input string at all,
  594. * this function returns new Long(0), and the parse position is
  595. * left unchanged.
  596. */
  597. #ifdef RBNF_DEBUG
  598. #include <stdio.h>
  599. static void dumpUS(FILE* f, const UnicodeString& us) {
  600. int len = us.length();
  601. char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
  602. if (buf != nullptr) {
  603. us.extract(0, len, buf);
  604. buf[len] = 0;
  605. fprintf(f, "%s", buf);
  606. uprv_free(buf); //delete[] buf;
  607. }
  608. }
  609. #endif
  610. UBool
  611. NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const
  612. {
  613. // try matching each rule in the rule set against the text being
  614. // parsed. Whichever one matches the most characters is the one
  615. // that determines the value we return.
  616. result.setLong(0);
  617. // dump out if there's no text to parse
  618. if (text.length() == 0) {
  619. return 0;
  620. }
  621. ParsePosition highWaterMark;
  622. ParsePosition workingPos = pos;
  623. #ifdef RBNF_DEBUG
  624. fprintf(stderr, "<nfrs> %x '", this);
  625. dumpUS(stderr, name);
  626. fprintf(stderr, "' text '");
  627. dumpUS(stderr, text);
  628. fprintf(stderr, "'\n");
  629. fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
  630. #endif
  631. // Try each of the negative rules, fraction rules, infinity rules and NaN rules
  632. for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
  633. if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
  634. // Mark this rule as being executed so that we don't try to execute it again.
  635. nonNumericalExecutedRuleMask |= 1 << i;
  636. Formattable tempResult;
  637. UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
  638. if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
  639. result = tempResult;
  640. highWaterMark = workingPos;
  641. }
  642. workingPos = pos;
  643. }
  644. }
  645. #ifdef RBNF_DEBUG
  646. fprintf(stderr, "<nfrs> continue other with text '");
  647. dumpUS(stderr, text);
  648. fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
  649. #endif
  650. // finally, go through the regular rules one at a time. We start
  651. // at the end of the list because we want to try matching the most
  652. // sigificant rule first (this helps ensure that we parse
  653. // "five thousand three hundred six" as
  654. // "(five thousand) (three hundred) (six)" rather than
  655. // "((five thousand three) hundred) (six)"). Skip rules whose
  656. // base values are higher than the upper bound (again, this helps
  657. // limit ambiguity by making sure the rules that match a rule's
  658. // are less significant than the rule containing the substitutions)/
  659. {
  660. int64_t ub = util64_fromDouble(upperBound);
  661. #ifdef RBNF_DEBUG
  662. {
  663. char ubstr[64];
  664. util64_toa(ub, ubstr, 64);
  665. char ubstrhex[64];
  666. util64_toa(ub, ubstrhex, 64, 16);
  667. fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
  668. }
  669. #endif
  670. for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
  671. if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
  672. continue;
  673. }
  674. Formattable tempResult;
  675. UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
  676. if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
  677. result = tempResult;
  678. highWaterMark = workingPos;
  679. }
  680. workingPos = pos;
  681. }
  682. }
  683. #ifdef RBNF_DEBUG
  684. fprintf(stderr, "<nfrs> exit\n");
  685. #endif
  686. // finally, update the parse position we were passed to point to the
  687. // first character we didn't use, and return the result that
  688. // corresponds to that string of characters
  689. pos = highWaterMark;
  690. return 1;
  691. }
  692. void
  693. NFRuleSet::appendRules(UnicodeString& result) const
  694. {
  695. uint32_t i;
  696. // the rule set name goes first...
  697. result.append(name);
  698. result.append(gColon);
  699. result.append(gLineFeed);
  700. // followed by the regular rules...
  701. for (i = 0; i < rules.size(); i++) {
  702. rules[i]->_appendRuleText(result);
  703. result.append(gLineFeed);
  704. }
  705. // followed by the special rules (if they exist)
  706. for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
  707. NFRule *rule = nonNumericalRules[i];
  708. if (nonNumericalRules[i]) {
  709. if (rule->getBaseValue() == NFRule::kImproperFractionRule
  710. || rule->getBaseValue() == NFRule::kProperFractionRule
  711. || rule->getBaseValue() == NFRule::kDefaultRule)
  712. {
  713. for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
  714. NFRule *fractionRule = fractionRules[fIdx];
  715. if (fractionRule->getBaseValue() == rule->getBaseValue()) {
  716. fractionRule->_appendRuleText(result);
  717. result.append(gLineFeed);
  718. }
  719. }
  720. }
  721. else {
  722. rule->_appendRuleText(result);
  723. result.append(gLineFeed);
  724. }
  725. }
  726. }
  727. }
  728. // utility functions
  729. int64_t util64_fromDouble(double d) {
  730. int64_t result = 0;
  731. if (!uprv_isNaN(d)) {
  732. double mant = uprv_maxMantissa();
  733. if (d < -mant) {
  734. d = -mant;
  735. } else if (d > mant) {
  736. d = mant;
  737. }
  738. UBool neg = d < 0;
  739. if (neg) {
  740. d = -d;
  741. }
  742. result = (int64_t)uprv_floor(d);
  743. if (neg) {
  744. result = -result;
  745. }
  746. }
  747. return result;
  748. }
  749. uint64_t util64_pow(uint32_t base, uint16_t exponent) {
  750. if (base == 0) {
  751. return 0;
  752. }
  753. uint64_t result = 1;
  754. uint64_t pow = base;
  755. while (true) {
  756. if ((exponent & 1) == 1) {
  757. result *= pow;
  758. }
  759. exponent >>= 1;
  760. if (exponent == 0) {
  761. break;
  762. }
  763. pow *= pow;
  764. }
  765. return result;
  766. }
  767. static const uint8_t asciiDigits[] = {
  768. 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
  769. 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
  770. 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
  771. 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
  772. 0x77u, 0x78u, 0x79u, 0x7au,
  773. };
  774. static const char16_t kUMinus = (char16_t)0x002d;
  775. #ifdef RBNF_DEBUG
  776. static const char kMinus = '-';
  777. static const uint8_t digitInfo[] = {
  778. 0, 0, 0, 0, 0, 0, 0, 0,
  779. 0, 0, 0, 0, 0, 0, 0, 0,
  780. 0, 0, 0, 0, 0, 0, 0, 0,
  781. 0, 0, 0, 0, 0, 0, 0, 0,
  782. 0, 0, 0, 0, 0, 0, 0, 0,
  783. 0, 0, 0, 0, 0, 0, 0, 0,
  784. 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
  785. 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
  786. 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
  787. 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
  788. 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
  789. 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
  790. 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
  791. 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
  792. 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
  793. 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
  794. };
  795. int64_t util64_atoi(const char* str, uint32_t radix)
  796. {
  797. if (radix > 36) {
  798. radix = 36;
  799. } else if (radix < 2) {
  800. radix = 2;
  801. }
  802. int64_t lradix = radix;
  803. int neg = 0;
  804. if (*str == kMinus) {
  805. ++str;
  806. neg = 1;
  807. }
  808. int64_t result = 0;
  809. uint8_t b;
  810. while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
  811. result *= lradix;
  812. result += (int32_t)b;
  813. }
  814. if (neg) {
  815. result = -result;
  816. }
  817. return result;
  818. }
  819. int64_t util64_utoi(const char16_t* str, uint32_t radix)
  820. {
  821. if (radix > 36) {
  822. radix = 36;
  823. } else if (radix < 2) {
  824. radix = 2;
  825. }
  826. int64_t lradix = radix;
  827. int neg = 0;
  828. if (*str == kUMinus) {
  829. ++str;
  830. neg = 1;
  831. }
  832. int64_t result = 0;
  833. char16_t c;
  834. uint8_t b;
  835. while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
  836. result *= lradix;
  837. result += (int32_t)b;
  838. }
  839. if (neg) {
  840. result = -result;
  841. }
  842. return result;
  843. }
  844. uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
  845. {
  846. if (radix > 36) {
  847. radix = 36;
  848. } else if (radix < 2) {
  849. radix = 2;
  850. }
  851. int64_t base = radix;
  852. char* p = buf;
  853. if (len && (w < 0) && (radix == 10) && !raw) {
  854. w = -w;
  855. *p++ = kMinus;
  856. --len;
  857. } else if (len && (w == 0)) {
  858. *p++ = (char)raw ? 0 : asciiDigits[0];
  859. --len;
  860. }
  861. while (len && w != 0) {
  862. int64_t n = w / base;
  863. int64_t m = n * base;
  864. int32_t d = (int32_t)(w-m);
  865. *p++ = raw ? (char)d : asciiDigits[d];
  866. w = n;
  867. --len;
  868. }
  869. if (len) {
  870. *p = 0; // null terminate if room for caller convenience
  871. }
  872. len = p - buf;
  873. if (*buf == kMinus) {
  874. ++buf;
  875. }
  876. while (--p > buf) {
  877. char c = *p;
  878. *p = *buf;
  879. *buf = c;
  880. ++buf;
  881. }
  882. return len;
  883. }
  884. #endif
  885. uint32_t util64_tou(int64_t w, char16_t* buf, uint32_t len, uint32_t radix, UBool raw)
  886. {
  887. if (radix > 36) {
  888. radix = 36;
  889. } else if (radix < 2) {
  890. radix = 2;
  891. }
  892. int64_t base = radix;
  893. char16_t* p = buf;
  894. if (len && (w < 0) && (radix == 10) && !raw) {
  895. w = -w;
  896. *p++ = kUMinus;
  897. --len;
  898. } else if (len && (w == 0)) {
  899. *p++ = (char16_t)raw ? 0 : asciiDigits[0];
  900. --len;
  901. }
  902. while (len && (w != 0)) {
  903. int64_t n = w / base;
  904. int64_t m = n * base;
  905. int32_t d = (int32_t)(w-m);
  906. *p++ = (char16_t)(raw ? d : asciiDigits[d]);
  907. w = n;
  908. --len;
  909. }
  910. if (len) {
  911. *p = 0; // null terminate if room for caller convenience
  912. }
  913. len = (uint32_t)(p - buf);
  914. if (*buf == kUMinus) {
  915. ++buf;
  916. }
  917. while (--p > buf) {
  918. char16_t c = *p;
  919. *p = *buf;
  920. *buf = c;
  921. ++buf;
  922. }
  923. return len;
  924. }
  925. U_NAMESPACE_END
  926. /* U_HAVE_RBNF */
  927. #endif