localematcher.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834
  1. // © 2019 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // localematcher.cpp
  4. // created: 2019may08 Markus W. Scherer
  5. #include <optional>
  6. #include "unicode/utypes.h"
  7. #include "unicode/localebuilder.h"
  8. #include "unicode/localematcher.h"
  9. #include "unicode/locid.h"
  10. #include "unicode/stringpiece.h"
  11. #include "unicode/uloc.h"
  12. #include "unicode/uobject.h"
  13. #include "cstring.h"
  14. #include "localeprioritylist.h"
  15. #include "loclikelysubtags.h"
  16. #include "locdistance.h"
  17. #include "lsr.h"
  18. #include "uassert.h"
  19. #include "uhash.h"
  20. #include "ustr_imp.h"
  21. #include "uvector.h"
  22. #define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
  23. /**
  24. * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
  25. *
  26. * @draft ICU 65
  27. */
  28. enum ULocMatchLifetime {
  29. /**
  30. * Locale objects are temporary.
  31. * The matcher will make a copy of a locale that will be used beyond one function call.
  32. *
  33. * @draft ICU 65
  34. */
  35. ULOCMATCH_TEMPORARY_LOCALES,
  36. /**
  37. * Locale objects are stored at least as long as the matcher is used.
  38. * The matcher will keep only a pointer to a locale that will be used beyond one function call,
  39. * avoiding a copy.
  40. *
  41. * @draft ICU 65
  42. */
  43. ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
  44. };
  45. #ifndef U_IN_DOXYGEN
  46. typedef enum ULocMatchLifetime ULocMatchLifetime;
  47. #endif
  48. U_NAMESPACE_BEGIN
  49. LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) noexcept :
  50. desiredLocale(src.desiredLocale),
  51. supportedLocale(src.supportedLocale),
  52. desiredIndex(src.desiredIndex),
  53. supportedIndex(src.supportedIndex),
  54. desiredIsOwned(src.desiredIsOwned) {
  55. if (desiredIsOwned) {
  56. src.desiredLocale = nullptr;
  57. src.desiredIndex = -1;
  58. src.desiredIsOwned = false;
  59. }
  60. }
  61. LocaleMatcher::Result::~Result() {
  62. if (desiredIsOwned) {
  63. delete desiredLocale;
  64. }
  65. }
  66. LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) noexcept {
  67. this->~Result();
  68. desiredLocale = src.desiredLocale;
  69. supportedLocale = src.supportedLocale;
  70. desiredIndex = src.desiredIndex;
  71. supportedIndex = src.supportedIndex;
  72. desiredIsOwned = src.desiredIsOwned;
  73. if (desiredIsOwned) {
  74. src.desiredLocale = nullptr;
  75. src.desiredIndex = -1;
  76. src.desiredIsOwned = false;
  77. }
  78. return *this;
  79. }
  80. Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
  81. if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
  82. return Locale::getRoot();
  83. }
  84. const Locale *bestDesired = getDesiredLocale();
  85. if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
  86. return *supportedLocale;
  87. }
  88. LocaleBuilder b;
  89. b.setLocale(*supportedLocale);
  90. // Copy the region from bestDesired, if there is one.
  91. const char *region = bestDesired->getCountry();
  92. if (*region != 0) {
  93. b.setRegion(region);
  94. }
  95. // Copy the variants from bestDesired, if there are any.
  96. // Note that this will override any supportedLocale variants.
  97. // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
  98. const char *variants = bestDesired->getVariant();
  99. if (*variants != 0) {
  100. b.setVariant(variants);
  101. }
  102. // Copy the extensions from bestDesired, if there are any.
  103. // C++ note: The following note, copied from Java, may not be true,
  104. // as long as C++ copies by legacy ICU keyword, not by extension singleton.
  105. // Note that this will override any supportedLocale extensions.
  106. // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
  107. // (replacing calendar).
  108. b.copyExtensionsFrom(*bestDesired, errorCode);
  109. return b.build(errorCode);
  110. }
  111. LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) noexcept :
  112. errorCode_(src.errorCode_),
  113. supportedLocales_(src.supportedLocales_),
  114. thresholdDistance_(src.thresholdDistance_),
  115. demotion_(src.demotion_),
  116. defaultLocale_(src.defaultLocale_),
  117. withDefault_(src.withDefault_),
  118. favor_(src.favor_),
  119. direction_(src.direction_) {
  120. src.supportedLocales_ = nullptr;
  121. src.defaultLocale_ = nullptr;
  122. }
  123. LocaleMatcher::Builder::~Builder() {
  124. delete supportedLocales_;
  125. delete defaultLocale_;
  126. delete maxDistanceDesired_;
  127. delete maxDistanceSupported_;
  128. }
  129. LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) noexcept {
  130. this->~Builder();
  131. errorCode_ = src.errorCode_;
  132. supportedLocales_ = src.supportedLocales_;
  133. thresholdDistance_ = src.thresholdDistance_;
  134. demotion_ = src.demotion_;
  135. defaultLocale_ = src.defaultLocale_;
  136. withDefault_ = src.withDefault_,
  137. favor_ = src.favor_;
  138. direction_ = src.direction_;
  139. src.supportedLocales_ = nullptr;
  140. src.defaultLocale_ = nullptr;
  141. return *this;
  142. }
  143. void LocaleMatcher::Builder::clearSupportedLocales() {
  144. if (supportedLocales_ != nullptr) {
  145. supportedLocales_->removeAllElements();
  146. }
  147. }
  148. bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
  149. if (U_FAILURE(errorCode_)) { return false; }
  150. if (supportedLocales_ != nullptr) { return true; }
  151. LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
  152. if (U_FAILURE(errorCode_)) { return false; }
  153. supportedLocales_ = lpSupportedLocales.orphan();
  154. return true;
  155. }
  156. LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
  157. StringPiece locales) {
  158. LocalePriorityList list(locales, errorCode_);
  159. if (U_FAILURE(errorCode_)) { return *this; }
  160. clearSupportedLocales();
  161. if (!ensureSupportedLocaleVector()) { return *this; }
  162. int32_t length = list.getLengthIncludingRemoved();
  163. for (int32_t i = 0; i < length; ++i) {
  164. Locale *locale = list.orphanLocaleAt(i);
  165. if (locale == nullptr) { continue; }
  166. supportedLocales_->adoptElement(locale, errorCode_);
  167. if (U_FAILURE(errorCode_)) {
  168. break;
  169. }
  170. }
  171. return *this;
  172. }
  173. LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
  174. if (ensureSupportedLocaleVector()) {
  175. clearSupportedLocales();
  176. while (locales.hasNext() && U_SUCCESS(errorCode_)) {
  177. const Locale &locale = locales.next();
  178. LocalPointer<Locale> clone (locale.clone(), errorCode_);
  179. supportedLocales_->adoptElement(clone.orphan(), errorCode_);
  180. }
  181. }
  182. return *this;
  183. }
  184. LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
  185. if (ensureSupportedLocaleVector()) {
  186. LocalPointer<Locale> clone(locale.clone(), errorCode_);
  187. supportedLocales_->adoptElement(clone.orphan(), errorCode_);
  188. }
  189. return *this;
  190. }
  191. LocaleMatcher::Builder &LocaleMatcher::Builder::setNoDefaultLocale() {
  192. if (U_FAILURE(errorCode_)) { return *this; }
  193. delete defaultLocale_;
  194. defaultLocale_ = nullptr;
  195. withDefault_ = false;
  196. return *this;
  197. }
  198. LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
  199. if (U_FAILURE(errorCode_)) { return *this; }
  200. Locale *clone = nullptr;
  201. if (defaultLocale != nullptr) {
  202. clone = defaultLocale->clone();
  203. if (clone == nullptr) {
  204. errorCode_ = U_MEMORY_ALLOCATION_ERROR;
  205. return *this;
  206. }
  207. }
  208. delete defaultLocale_;
  209. defaultLocale_ = clone;
  210. withDefault_ = true;
  211. return *this;
  212. }
  213. LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
  214. if (U_FAILURE(errorCode_)) { return *this; }
  215. favor_ = subtag;
  216. return *this;
  217. }
  218. LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
  219. if (U_FAILURE(errorCode_)) { return *this; }
  220. demotion_ = demotion;
  221. return *this;
  222. }
  223. LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
  224. const Locale &supported) {
  225. if (U_FAILURE(errorCode_)) { return *this; }
  226. Locale *desiredClone = desired.clone();
  227. Locale *supportedClone = supported.clone();
  228. if (desiredClone == nullptr || supportedClone == nullptr) {
  229. delete desiredClone; // in case only one could not be allocated
  230. delete supportedClone;
  231. errorCode_ = U_MEMORY_ALLOCATION_ERROR;
  232. return *this;
  233. }
  234. delete maxDistanceDesired_;
  235. delete maxDistanceSupported_;
  236. maxDistanceDesired_ = desiredClone;
  237. maxDistanceSupported_ = supportedClone;
  238. return *this;
  239. }
  240. #if 0
  241. /**
  242. * <i>Internal only!</i>
  243. *
  244. * @param thresholdDistance the thresholdDistance to set, with -1 = default
  245. * @return this Builder object
  246. * @internal
  247. * @deprecated This API is ICU internal only.
  248. */
  249. @Deprecated
  250. LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
  251. if (U_FAILURE(errorCode_)) { return *this; }
  252. if (thresholdDistance > 100) {
  253. thresholdDistance = 100;
  254. }
  255. thresholdDistance_ = thresholdDistance;
  256. return *this;
  257. }
  258. #endif
  259. UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
  260. if (U_FAILURE(outErrorCode)) { return true; }
  261. if (U_SUCCESS(errorCode_)) { return false; }
  262. outErrorCode = errorCode_;
  263. return true;
  264. }
  265. LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
  266. if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
  267. errorCode = errorCode_;
  268. }
  269. return LocaleMatcher(*this, errorCode);
  270. }
  271. namespace {
  272. LSR getMaximalLsrOrUnd(const LikelySubtags &likelySubtags, const Locale &locale,
  273. UErrorCode &errorCode) {
  274. if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
  275. return UND_LSR;
  276. } else {
  277. return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode);
  278. }
  279. }
  280. int32_t hashLSR(const UHashTok token) {
  281. const LSR *lsr = static_cast<const LSR *>(token.pointer);
  282. return lsr->hashCode;
  283. }
  284. UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
  285. const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
  286. const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
  287. return *lsr1 == *lsr2;
  288. }
  289. } // namespace
  290. int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
  291. UErrorCode &errorCode) {
  292. if (U_FAILURE(errorCode)) { return suppLength; }
  293. if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
  294. uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
  295. if (U_SUCCESS(errorCode)) {
  296. supportedLSRs[suppLength] = &lsr;
  297. supportedIndexes[suppLength++] = i;
  298. }
  299. }
  300. return suppLength;
  301. }
  302. LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
  303. likelySubtags(*LikelySubtags::getSingleton(errorCode)),
  304. localeDistance(*LocaleDistance::getSingleton(errorCode)),
  305. thresholdDistance(builder.thresholdDistance_),
  306. demotionPerDesiredLocale(0),
  307. favorSubtag(builder.favor_),
  308. direction(builder.direction_),
  309. supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
  310. supportedLsrToIndex(nullptr),
  311. supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
  312. ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
  313. if (U_FAILURE(errorCode)) { return; }
  314. const Locale *def = builder.defaultLocale_;
  315. LSR builderDefaultLSR;
  316. const LSR *defLSR = nullptr;
  317. if (def != nullptr) {
  318. ownedDefaultLocale = def->clone();
  319. if (ownedDefaultLocale == nullptr) {
  320. errorCode = U_MEMORY_ALLOCATION_ERROR;
  321. return;
  322. }
  323. def = ownedDefaultLocale;
  324. builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
  325. if (U_FAILURE(errorCode)) { return; }
  326. defLSR = &builderDefaultLSR;
  327. }
  328. supportedLocalesLength = builder.supportedLocales_ != nullptr ?
  329. builder.supportedLocales_->size() : 0;
  330. if (supportedLocalesLength > 0) {
  331. // Store the supported locales in input order,
  332. // so that when different types are used (e.g., language tag strings)
  333. // we can return those by parallel index.
  334. supportedLocales = static_cast<const Locale **>(
  335. uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
  336. // Supported LRSs in input order.
  337. // In C++, we store these permanently to simplify ownership management
  338. // in the hash tables. Duplicate LSRs (if any) are unused overhead.
  339. lsrs = new LSR[supportedLocalesLength];
  340. if (supportedLocales == nullptr || lsrs == nullptr) {
  341. errorCode = U_MEMORY_ALLOCATION_ERROR;
  342. return;
  343. }
  344. // If the constructor fails partway, we need null pointers for destructibility.
  345. uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
  346. for (int32_t i = 0; i < supportedLocalesLength; ++i) {
  347. const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
  348. supportedLocales[i] = locale.clone();
  349. if (supportedLocales[i] == nullptr) {
  350. errorCode = U_MEMORY_ALLOCATION_ERROR;
  351. return;
  352. }
  353. const Locale &supportedLocale = *supportedLocales[i];
  354. LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
  355. lsr.setHashCode();
  356. if (U_FAILURE(errorCode)) { return; }
  357. }
  358. // We need an unordered map from LSR to first supported locale with that LSR,
  359. // and an ordered list of (LSR, supported index) for
  360. // the supported locales in the following order:
  361. // 1. Default locale, if it is supported.
  362. // 2. Priority locales (aka "paradigm locales") in builder order.
  363. // 3. Remaining locales in builder order.
  364. supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
  365. supportedLocalesLength, &errorCode);
  366. if (U_FAILURE(errorCode)) { return; }
  367. supportedLSRs = static_cast<const LSR **>(
  368. uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
  369. supportedIndexes = static_cast<int32_t *>(
  370. uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
  371. if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
  372. errorCode = U_MEMORY_ALLOCATION_ERROR;
  373. return;
  374. }
  375. int32_t suppLength = 0;
  376. // Determine insertion order.
  377. // Add locales immediately that are equivalent to the default.
  378. MaybeStackArray<int8_t, 100> order(supportedLocalesLength, errorCode);
  379. if (U_FAILURE(errorCode)) { return; }
  380. int32_t numParadigms = 0;
  381. for (int32_t i = 0; i < supportedLocalesLength; ++i) {
  382. const Locale &locale = *supportedLocales[i];
  383. const LSR &lsr = lsrs[i];
  384. if (defLSR == nullptr && builder.withDefault_) {
  385. // Implicit default locale = first supported locale, if not turned off.
  386. U_ASSERT(i == 0);
  387. def = &locale;
  388. defLSR = &lsr;
  389. order[i] = 1;
  390. suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
  391. } else if (defLSR != nullptr && lsr.isEquivalentTo(*defLSR)) {
  392. order[i] = 1;
  393. suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
  394. } else if (localeDistance.isParadigmLSR(lsr)) {
  395. order[i] = 2;
  396. ++numParadigms;
  397. } else {
  398. order[i] = 3;
  399. }
  400. if (U_FAILURE(errorCode)) { return; }
  401. }
  402. // Add supported paradigm locales.
  403. int32_t paradigmLimit = suppLength + numParadigms;
  404. for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
  405. if (order[i] == 2) {
  406. suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
  407. }
  408. }
  409. // Add remaining supported locales.
  410. for (int32_t i = 0; i < supportedLocalesLength; ++i) {
  411. if (order[i] == 3) {
  412. suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
  413. }
  414. }
  415. supportedLSRsLength = suppLength;
  416. // If supportedLSRsLength < supportedLocalesLength then
  417. // we waste as many array slots as there are duplicate supported LSRs,
  418. // but the amount of wasted space is small as long as there are few duplicates.
  419. }
  420. defaultLocale = def;
  421. if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
  422. demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
  423. }
  424. if (thresholdDistance >= 0) {
  425. // already copied
  426. } else if (builder.maxDistanceDesired_ != nullptr) {
  427. LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
  428. const LSR *pSuppLSR = &suppLSR;
  429. int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
  430. getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
  431. &pSuppLSR, 1,
  432. LocaleDistance::shiftDistance(100), favorSubtag, direction);
  433. if (U_SUCCESS(errorCode)) {
  434. // +1 for an exclusive threshold from an inclusive max.
  435. thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
  436. } else {
  437. thresholdDistance = 0;
  438. }
  439. } else {
  440. thresholdDistance = localeDistance.getDefaultScriptDistance();
  441. }
  442. }
  443. LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) noexcept :
  444. likelySubtags(src.likelySubtags),
  445. localeDistance(src.localeDistance),
  446. thresholdDistance(src.thresholdDistance),
  447. demotionPerDesiredLocale(src.demotionPerDesiredLocale),
  448. favorSubtag(src.favorSubtag),
  449. direction(src.direction),
  450. supportedLocales(src.supportedLocales), lsrs(src.lsrs),
  451. supportedLocalesLength(src.supportedLocalesLength),
  452. supportedLsrToIndex(src.supportedLsrToIndex),
  453. supportedLSRs(src.supportedLSRs),
  454. supportedIndexes(src.supportedIndexes),
  455. supportedLSRsLength(src.supportedLSRsLength),
  456. ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
  457. src.supportedLocales = nullptr;
  458. src.lsrs = nullptr;
  459. src.supportedLocalesLength = 0;
  460. src.supportedLsrToIndex = nullptr;
  461. src.supportedLSRs = nullptr;
  462. src.supportedIndexes = nullptr;
  463. src.supportedLSRsLength = 0;
  464. src.ownedDefaultLocale = nullptr;
  465. src.defaultLocale = nullptr;
  466. }
  467. LocaleMatcher::~LocaleMatcher() {
  468. for (int32_t i = 0; i < supportedLocalesLength; ++i) {
  469. delete supportedLocales[i];
  470. }
  471. uprv_free(supportedLocales);
  472. delete[] lsrs;
  473. uhash_close(supportedLsrToIndex);
  474. uprv_free(supportedLSRs);
  475. uprv_free(supportedIndexes);
  476. delete ownedDefaultLocale;
  477. }
  478. LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) noexcept {
  479. this->~LocaleMatcher();
  480. thresholdDistance = src.thresholdDistance;
  481. demotionPerDesiredLocale = src.demotionPerDesiredLocale;
  482. favorSubtag = src.favorSubtag;
  483. direction = src.direction;
  484. supportedLocales = src.supportedLocales;
  485. lsrs = src.lsrs;
  486. supportedLocalesLength = src.supportedLocalesLength;
  487. supportedLsrToIndex = src.supportedLsrToIndex;
  488. supportedLSRs = src.supportedLSRs;
  489. supportedIndexes = src.supportedIndexes;
  490. supportedLSRsLength = src.supportedLSRsLength;
  491. ownedDefaultLocale = src.ownedDefaultLocale;
  492. defaultLocale = src.defaultLocale;
  493. src.supportedLocales = nullptr;
  494. src.lsrs = nullptr;
  495. src.supportedLocalesLength = 0;
  496. src.supportedLsrToIndex = nullptr;
  497. src.supportedLSRs = nullptr;
  498. src.supportedIndexes = nullptr;
  499. src.supportedLSRsLength = 0;
  500. src.ownedDefaultLocale = nullptr;
  501. src.defaultLocale = nullptr;
  502. return *this;
  503. }
  504. class LocaleLsrIterator {
  505. public:
  506. LocaleLsrIterator(const LikelySubtags &likelySubtags, Locale::Iterator &locales,
  507. ULocMatchLifetime lifetime) :
  508. likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
  509. ~LocaleLsrIterator() {
  510. if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
  511. delete remembered;
  512. }
  513. }
  514. bool hasNext() const {
  515. return locales.hasNext();
  516. }
  517. LSR next(UErrorCode &errorCode) {
  518. current = &locales.next();
  519. return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
  520. }
  521. void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
  522. if (U_FAILURE(errorCode)) { return; }
  523. bestDesiredIndex = desiredIndex;
  524. if (lifetime == ULOCMATCH_STORED_LOCALES) {
  525. remembered = current;
  526. } else {
  527. // ULOCMATCH_TEMPORARY_LOCALES
  528. delete remembered;
  529. remembered = new Locale(*current);
  530. if (remembered == nullptr) {
  531. errorCode = U_MEMORY_ALLOCATION_ERROR;
  532. }
  533. }
  534. }
  535. const Locale *orphanRemembered() {
  536. const Locale *rem = remembered;
  537. remembered = nullptr;
  538. return rem;
  539. }
  540. int32_t getBestDesiredIndex() const {
  541. return bestDesiredIndex;
  542. }
  543. private:
  544. const LikelySubtags &likelySubtags;
  545. Locale::Iterator &locales;
  546. ULocMatchLifetime lifetime;
  547. const Locale *current = nullptr, *remembered = nullptr;
  548. int32_t bestDesiredIndex = -1;
  549. };
  550. const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
  551. if (U_FAILURE(errorCode)) { return nullptr; }
  552. std::optional<int32_t> suppIndex = getBestSuppIndex(
  553. getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
  554. nullptr, errorCode);
  555. return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
  556. : defaultLocale;
  557. }
  558. const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
  559. UErrorCode &errorCode) const {
  560. if (U_FAILURE(errorCode)) { return nullptr; }
  561. if (!desiredLocales.hasNext()) {
  562. return defaultLocale;
  563. }
  564. LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
  565. std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
  566. return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
  567. : defaultLocale;
  568. }
  569. const Locale *LocaleMatcher::getBestMatchForListString(
  570. StringPiece desiredLocaleList, UErrorCode &errorCode) const {
  571. if (U_FAILURE(errorCode)) { return nullptr; }
  572. LocalePriorityList list(desiredLocaleList, errorCode);
  573. LocalePriorityList::Iterator iter = list.iterator();
  574. return getBestMatch(iter, errorCode);
  575. }
  576. LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
  577. const Locale &desiredLocale, UErrorCode &errorCode) const {
  578. if (U_FAILURE(errorCode)) {
  579. return Result(nullptr, defaultLocale, -1, -1, false);
  580. }
  581. std::optional<int32_t> suppIndex = getBestSuppIndex(
  582. getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
  583. nullptr, errorCode);
  584. if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
  585. return Result(nullptr, defaultLocale, -1, -1, false);
  586. } else {
  587. return Result(&desiredLocale, supportedLocales[*suppIndex], 0, *suppIndex, false);
  588. }
  589. }
  590. LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
  591. Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
  592. if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
  593. return Result(nullptr, defaultLocale, -1, -1, false);
  594. }
  595. LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
  596. std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
  597. if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
  598. return Result(nullptr, defaultLocale, -1, -1, false);
  599. } else {
  600. return Result(lsrIter.orphanRemembered(), supportedLocales[*suppIndex],
  601. lsrIter.getBestDesiredIndex(), *suppIndex, true);
  602. }
  603. }
  604. std::optional<int32_t> LocaleMatcher::getBestSuppIndex(LSR desiredLSR,
  605. LocaleLsrIterator *remainingIter,
  606. UErrorCode &errorCode) const {
  607. if (U_FAILURE(errorCode)) { return std::nullopt; }
  608. int32_t desiredIndex = 0;
  609. int32_t bestSupportedLsrIndex = -1;
  610. for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
  611. // Quick check for exact maximized LSR.
  612. if (supportedLsrToIndex != nullptr) {
  613. desiredLSR.setHashCode();
  614. UBool found = false;
  615. int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
  616. if (found) {
  617. if (remainingIter != nullptr) {
  618. remainingIter->rememberCurrent(desiredIndex, errorCode);
  619. }
  620. return suppIndex;
  621. }
  622. }
  623. int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
  624. desiredLSR, supportedLSRs, supportedLSRsLength,
  625. bestShiftedDistance, favorSubtag, direction);
  626. if (bestIndexAndDistance >= 0) {
  627. bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
  628. if (remainingIter != nullptr) {
  629. remainingIter->rememberCurrent(desiredIndex, errorCode);
  630. if (U_FAILURE(errorCode)) { return std::nullopt; }
  631. }
  632. bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
  633. }
  634. if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
  635. break;
  636. }
  637. if (remainingIter == nullptr || !remainingIter->hasNext()) {
  638. break;
  639. }
  640. desiredLSR = remainingIter->next(errorCode);
  641. if (U_FAILURE(errorCode)) { return std::nullopt; }
  642. ++desiredIndex;
  643. }
  644. if (bestSupportedLsrIndex < 0) {
  645. // no good match
  646. return std::nullopt;
  647. }
  648. return supportedIndexes[bestSupportedLsrIndex];
  649. }
  650. UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
  651. UErrorCode &errorCode) const {
  652. if (U_FAILURE(errorCode)) { return false; }
  653. LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
  654. if (U_FAILURE(errorCode)) { return false; }
  655. const LSR *pSuppLSR = &suppLSR;
  656. int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
  657. getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
  658. &pSuppLSR, 1,
  659. LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
  660. return indexAndDistance >= 0;
  661. }
  662. double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
  663. if (U_FAILURE(errorCode)) { return 0.; }
  664. // Returns the inverse of the distance: That is, 1-distance(desired, supported).
  665. LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
  666. if (U_FAILURE(errorCode)) { return 0.; }
  667. const LSR *pSuppLSR = &suppLSR;
  668. int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
  669. getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
  670. &pSuppLSR, 1,
  671. LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
  672. double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
  673. return (100.0 - distance) / 100.0;
  674. }
  675. U_NAMESPACE_END
  676. // uloc_acceptLanguage() --------------------------------------------------- ***
  677. U_NAMESPACE_USE
  678. namespace {
  679. class LocaleFromTag {
  680. public:
  681. LocaleFromTag() : locale(Locale::getRoot()) {}
  682. const Locale &operator()(const char *tag) { return locale = Locale(tag); }
  683. private:
  684. // Store the locale in the converter, rather than return a reference to a temporary,
  685. // or a value which could go out of scope with the caller's reference to it.
  686. Locale locale;
  687. };
  688. int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desiredLocales,
  689. char *dest, int32_t capacity, UAcceptResult *acceptResult,
  690. UErrorCode &errorCode) {
  691. if (U_FAILURE(errorCode)) { return 0; }
  692. LocaleMatcher::Builder builder;
  693. const char *locString;
  694. while ((locString = uenum_next(&supportedLocales, nullptr, &errorCode)) != nullptr) {
  695. Locale loc(locString);
  696. if (loc.isBogus()) {
  697. errorCode = U_ILLEGAL_ARGUMENT_ERROR;
  698. return 0;
  699. }
  700. builder.addSupportedLocale(loc);
  701. }
  702. LocaleMatcher matcher = builder.build(errorCode);
  703. LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocales, errorCode);
  704. if (U_FAILURE(errorCode)) { return 0; }
  705. if (result.getDesiredIndex() >= 0) {
  706. if (acceptResult != nullptr) {
  707. *acceptResult = *result.getDesiredLocale() == *result.getSupportedLocale() ?
  708. ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK;
  709. }
  710. const char *bestStr = result.getSupportedLocale()->getName();
  711. int32_t bestLength = static_cast<int32_t>(uprv_strlen(bestStr));
  712. if (bestLength <= capacity) {
  713. uprv_memcpy(dest, bestStr, bestLength);
  714. }
  715. return u_terminateChars(dest, capacity, bestLength, &errorCode);
  716. } else {
  717. if (acceptResult != nullptr) {
  718. *acceptResult = ULOC_ACCEPT_FAILED;
  719. }
  720. return u_terminateChars(dest, capacity, 0, &errorCode);
  721. }
  722. }
  723. } // namespace
  724. U_CAPI int32_t U_EXPORT2
  725. uloc_acceptLanguage(char *result, int32_t resultAvailable,
  726. UAcceptResult *outResult,
  727. const char **acceptList, int32_t acceptListCount,
  728. UEnumeration *availableLocales,
  729. UErrorCode *status) {
  730. if (U_FAILURE(*status)) { return 0; }
  731. if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
  732. (acceptList == nullptr ? acceptListCount != 0 : acceptListCount < 0) ||
  733. availableLocales == nullptr) {
  734. *status = U_ILLEGAL_ARGUMENT_ERROR;
  735. return 0;
  736. }
  737. LocaleFromTag converter;
  738. Locale::ConvertingIterator<const char **, LocaleFromTag> desiredLocales(
  739. acceptList, acceptList + acceptListCount, converter);
  740. return acceptLanguage(*availableLocales, desiredLocales,
  741. result, resultAvailable, outResult, *status);
  742. }
  743. U_CAPI int32_t U_EXPORT2
  744. uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
  745. UAcceptResult *outResult,
  746. const char *httpAcceptLanguage,
  747. UEnumeration *availableLocales,
  748. UErrorCode *status) {
  749. if (U_FAILURE(*status)) { return 0; }
  750. if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
  751. httpAcceptLanguage == nullptr || availableLocales == nullptr) {
  752. *status = U_ILLEGAL_ARGUMENT_ERROR;
  753. return 0;
  754. }
  755. LocalePriorityList list(httpAcceptLanguage, *status);
  756. LocalePriorityList::Iterator desiredLocales = list.iterator();
  757. return acceptLanguage(*availableLocales, desiredLocales,
  758. result, resultAvailable, outResult, *status);
  759. }