localebuilder.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. // © 2019 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include <optional>
  4. #include <string_view>
  5. #include <utility>
  6. #include "bytesinkutil.h" // StringByteSink<CharString>
  7. #include "charstr.h"
  8. #include "cstring.h"
  9. #include "ulocimp.h"
  10. #include "unicode/localebuilder.h"
  11. #include "unicode/locid.h"
  12. namespace {
  13. inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
  14. inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
  15. constexpr const char* kAttributeKey = "attribute";
  16. bool _isExtensionSubtags(char key, const char* s, int32_t len) {
  17. switch (uprv_tolower(key)) {
  18. case 'u':
  19. return ultag_isUnicodeExtensionSubtags(s, len);
  20. case 't':
  21. return ultag_isTransformedExtensionSubtags(s, len);
  22. case 'x':
  23. return ultag_isPrivateuseValueSubtags(s, len);
  24. default:
  25. return ultag_isExtensionSubtags(s, len);
  26. }
  27. }
  28. } // namespace
  29. U_NAMESPACE_BEGIN
  30. LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
  31. script_(), region_(), variant_(nullptr), extensions_(nullptr)
  32. {
  33. language_[0] = 0;
  34. script_[0] = 0;
  35. region_[0] = 0;
  36. }
  37. LocaleBuilder::~LocaleBuilder()
  38. {
  39. delete variant_;
  40. delete extensions_;
  41. }
  42. LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
  43. {
  44. clear();
  45. setLanguage(locale.getLanguage());
  46. setScript(locale.getScript());
  47. setRegion(locale.getCountry());
  48. setVariant(locale.getVariant());
  49. extensions_ = locale.clone();
  50. if (extensions_ == nullptr) {
  51. status_ = U_MEMORY_ALLOCATION_ERROR;
  52. }
  53. return *this;
  54. }
  55. LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
  56. {
  57. Locale l = Locale::forLanguageTag(tag, status_);
  58. if (U_FAILURE(status_)) { return *this; }
  59. // Because setLocale will reset status_ we need to return
  60. // first if we have error in forLanguageTag.
  61. setLocale(l);
  62. return *this;
  63. }
  64. namespace {
  65. void setField(StringPiece input, char* dest, UErrorCode& errorCode,
  66. bool (*test)(const char*, int32_t)) {
  67. if (U_FAILURE(errorCode)) { return; }
  68. if (input.empty()) {
  69. dest[0] = '\0';
  70. } else if (test(input.data(), input.length())) {
  71. uprv_memcpy(dest, input.data(), input.length());
  72. dest[input.length()] = '\0';
  73. } else {
  74. errorCode = U_ILLEGAL_ARGUMENT_ERROR;
  75. }
  76. }
  77. } // namespace
  78. LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
  79. {
  80. setField(language, language_, status_, &ultag_isLanguageSubtag);
  81. return *this;
  82. }
  83. LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
  84. {
  85. setField(script, script_, status_, &ultag_isScriptSubtag);
  86. return *this;
  87. }
  88. LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
  89. {
  90. setField(region, region_, status_, &ultag_isRegionSubtag);
  91. return *this;
  92. }
  93. namespace {
  94. void transform(char* data, int32_t len) {
  95. for (int32_t i = 0; i < len; i++, data++) {
  96. if (*data == '_') {
  97. *data = '-';
  98. } else {
  99. *data = uprv_tolower(*data);
  100. }
  101. }
  102. }
  103. } // namespace
  104. LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
  105. {
  106. if (U_FAILURE(status_)) { return *this; }
  107. if (variant.empty()) {
  108. delete variant_;
  109. variant_ = nullptr;
  110. return *this;
  111. }
  112. CharString* new_variant = new CharString(variant, status_);
  113. if (U_FAILURE(status_)) { return *this; }
  114. if (new_variant == nullptr) {
  115. status_ = U_MEMORY_ALLOCATION_ERROR;
  116. return *this;
  117. }
  118. transform(new_variant->data(), new_variant->length());
  119. if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
  120. delete new_variant;
  121. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  122. return *this;
  123. }
  124. delete variant_;
  125. variant_ = new_variant;
  126. return *this;
  127. }
  128. namespace {
  129. bool
  130. _isKeywordValue(const char* key, const char* value, int32_t value_len)
  131. {
  132. if (key[1] == '\0') {
  133. // one char key
  134. return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
  135. _isExtensionSubtags(key[0], value, value_len));
  136. } else if (uprv_strcmp(key, kAttributeKey) == 0) {
  137. // unicode attributes
  138. return ultag_isUnicodeLocaleAttributes(value, value_len);
  139. }
  140. // otherwise: unicode extension value
  141. // We need to convert from legacy key/value to unicode
  142. // key/value
  143. std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
  144. std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);
  145. return unicode_locale_key.has_value() &&
  146. unicode_locale_type.has_value() &&
  147. ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
  148. static_cast<int32_t>(unicode_locale_key->size())) &&
  149. ultag_isUnicodeLocaleType(unicode_locale_type->data(),
  150. static_cast<int32_t>(unicode_locale_type->size()));
  151. }
  152. void
  153. _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
  154. Locale& to, bool validate, UErrorCode& errorCode)
  155. {
  156. if (U_FAILURE(errorCode)) { return; }
  157. LocalPointer<icu::StringEnumeration> ownedKeywords;
  158. if (keywords == nullptr) {
  159. ownedKeywords.adoptInstead(from.createKeywords(errorCode));
  160. if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
  161. keywords = ownedKeywords.getAlias();
  162. }
  163. const char* key;
  164. while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
  165. auto value = from.getKeywordValue<CharString>(key, errorCode);
  166. if (U_FAILURE(errorCode)) { return; }
  167. if (uprv_strcmp(key, kAttributeKey) == 0) {
  168. transform(value.data(), value.length());
  169. }
  170. if (validate &&
  171. !_isKeywordValue(key, value.data(), value.length())) {
  172. errorCode = U_ILLEGAL_ARGUMENT_ERROR;
  173. return;
  174. }
  175. to.setKeywordValue(key, value.data(), errorCode);
  176. if (U_FAILURE(errorCode)) { return; }
  177. }
  178. }
  179. void
  180. _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
  181. {
  182. if (U_FAILURE(errorCode)) { return; }
  183. // Clear Unicode attributes
  184. locale.setKeywordValue(kAttributeKey, "", errorCode);
  185. // Clear all Unicode keyword values
  186. LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
  187. if (U_FAILURE(errorCode) || iter.isNull()) { return; }
  188. const char* key;
  189. while ((key = iter->next(nullptr, errorCode)) != nullptr) {
  190. locale.setUnicodeKeywordValue(key, nullptr, errorCode);
  191. }
  192. }
  193. void
  194. _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
  195. {
  196. if (U_FAILURE(errorCode)) { return; }
  197. // Add the unicode extensions to extensions_
  198. CharString locale_str("und-u-", errorCode);
  199. locale_str.append(value, errorCode);
  200. _copyExtensions(
  201. Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
  202. locale, false, errorCode);
  203. }
  204. } // namespace
  205. LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
  206. {
  207. if (U_FAILURE(status_)) { return *this; }
  208. if (!UPRV_ISALPHANUM(key)) {
  209. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  210. return *this;
  211. }
  212. CharString value_str(value, status_);
  213. if (U_FAILURE(status_)) { return *this; }
  214. transform(value_str.data(), value_str.length());
  215. if (!value_str.isEmpty() &&
  216. !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
  217. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  218. return *this;
  219. }
  220. if (extensions_ == nullptr) {
  221. extensions_ = Locale::getRoot().clone();
  222. if (extensions_ == nullptr) {
  223. status_ = U_MEMORY_ALLOCATION_ERROR;
  224. return *this;
  225. }
  226. }
  227. if (uprv_tolower(key) != 'u') {
  228. // for t, x and others extension.
  229. extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
  230. status_);
  231. return *this;
  232. }
  233. _clearUAttributesAndKeyType(*extensions_, status_);
  234. if (U_FAILURE(status_)) { return *this; }
  235. if (!value.empty()) {
  236. _setUnicodeExtensions(*extensions_, value_str, status_);
  237. }
  238. return *this;
  239. }
  240. LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
  241. StringPiece key, StringPiece type)
  242. {
  243. if (U_FAILURE(status_)) { return *this; }
  244. if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
  245. (!type.empty() &&
  246. !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
  247. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  248. return *this;
  249. }
  250. if (extensions_ == nullptr) {
  251. extensions_ = Locale::getRoot().clone();
  252. if (extensions_ == nullptr) {
  253. status_ = U_MEMORY_ALLOCATION_ERROR;
  254. return *this;
  255. }
  256. }
  257. extensions_->setUnicodeKeywordValue(key, type, status_);
  258. return *this;
  259. }
  260. LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
  261. StringPiece value)
  262. {
  263. CharString value_str(value, status_);
  264. if (U_FAILURE(status_)) { return *this; }
  265. transform(value_str.data(), value_str.length());
  266. if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
  267. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  268. return *this;
  269. }
  270. if (extensions_ == nullptr) {
  271. extensions_ = Locale::getRoot().clone();
  272. if (extensions_ == nullptr) {
  273. status_ = U_MEMORY_ALLOCATION_ERROR;
  274. return *this;
  275. }
  276. extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
  277. return *this;
  278. }
  279. UErrorCode localErrorCode = U_ZERO_ERROR;
  280. auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
  281. if (U_FAILURE(localErrorCode)) {
  282. CharString new_attributes(value_str.data(), status_);
  283. // No attributes, set the attribute.
  284. extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
  285. return *this;
  286. }
  287. transform(attributes.data(),attributes.length());
  288. const char* start = attributes.data();
  289. const char* limit = attributes.data() + attributes.length();
  290. CharString new_attributes;
  291. bool inserted = false;
  292. while (start < limit) {
  293. if (!inserted) {
  294. int cmp = uprv_strcmp(start, value_str.data());
  295. if (cmp == 0) { return *this; } // Found it in attributes: Just return
  296. if (cmp > 0) {
  297. if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
  298. new_attributes.append(value_str.data(), status_);
  299. inserted = true;
  300. }
  301. }
  302. if (!new_attributes.isEmpty()) {
  303. new_attributes.append('_', status_);
  304. }
  305. new_attributes.append(start, status_);
  306. start += uprv_strlen(start) + 1;
  307. }
  308. if (!inserted) {
  309. if (!new_attributes.isEmpty()) {
  310. new_attributes.append('_', status_);
  311. }
  312. new_attributes.append(value_str.data(), status_);
  313. }
  314. // Not yet in the attributes, set the attribute.
  315. extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
  316. return *this;
  317. }
  318. LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
  319. StringPiece value)
  320. {
  321. CharString value_str(value, status_);
  322. if (U_FAILURE(status_)) { return *this; }
  323. transform(value_str.data(), value_str.length());
  324. if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
  325. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  326. return *this;
  327. }
  328. if (extensions_ == nullptr) { return *this; }
  329. UErrorCode localErrorCode = U_ZERO_ERROR;
  330. auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
  331. // get failure, just return
  332. if (U_FAILURE(localErrorCode)) { return *this; }
  333. // Do not have any attributes, just return.
  334. if (attributes.isEmpty()) { return *this; }
  335. char* p = attributes.data();
  336. // Replace null terminiator in place for _ and - so later
  337. // we can use uprv_strcmp to compare.
  338. for (int32_t i = 0; i < attributes.length(); i++, p++) {
  339. *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
  340. }
  341. const char* start = attributes.data();
  342. const char* limit = attributes.data() + attributes.length();
  343. CharString new_attributes;
  344. bool found = false;
  345. while (start < limit) {
  346. if (uprv_strcmp(start, value_str.data()) == 0) {
  347. found = true;
  348. } else {
  349. if (!new_attributes.isEmpty()) {
  350. new_attributes.append('_', status_);
  351. }
  352. new_attributes.append(start, status_);
  353. }
  354. start += uprv_strlen(start) + 1;
  355. }
  356. // Found the value in attributes, set the attribute.
  357. if (found) {
  358. extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
  359. }
  360. return *this;
  361. }
  362. LocaleBuilder& LocaleBuilder::clear()
  363. {
  364. status_ = U_ZERO_ERROR;
  365. language_[0] = 0;
  366. script_[0] = 0;
  367. region_[0] = 0;
  368. delete variant_;
  369. variant_ = nullptr;
  370. clearExtensions();
  371. return *this;
  372. }
  373. LocaleBuilder& LocaleBuilder::clearExtensions()
  374. {
  375. delete extensions_;
  376. extensions_ = nullptr;
  377. return *this;
  378. }
  379. Locale makeBogusLocale() {
  380. Locale bogus;
  381. bogus.setToBogus();
  382. return bogus;
  383. }
  384. void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
  385. {
  386. if (U_FAILURE(errorCode)) { return; }
  387. LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
  388. if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
  389. // Error, or no extensions to copy.
  390. return;
  391. }
  392. if (extensions_ == nullptr) {
  393. extensions_ = Locale::getRoot().clone();
  394. if (extensions_ == nullptr) {
  395. status_ = U_MEMORY_ALLOCATION_ERROR;
  396. return;
  397. }
  398. }
  399. _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
  400. }
  401. Locale LocaleBuilder::build(UErrorCode& errorCode)
  402. {
  403. if (U_FAILURE(errorCode)) {
  404. return makeBogusLocale();
  405. }
  406. if (U_FAILURE(status_)) {
  407. errorCode = status_;
  408. return makeBogusLocale();
  409. }
  410. CharString locale_str(language_, errorCode);
  411. if (uprv_strlen(script_) > 0) {
  412. locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
  413. }
  414. if (uprv_strlen(region_) > 0) {
  415. locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
  416. }
  417. if (variant_ != nullptr) {
  418. locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
  419. }
  420. if (U_FAILURE(errorCode)) {
  421. return makeBogusLocale();
  422. }
  423. Locale product(locale_str.data());
  424. if (extensions_ != nullptr) {
  425. _copyExtensions(*extensions_, nullptr, product, true, errorCode);
  426. }
  427. if (U_FAILURE(errorCode)) {
  428. return makeBogusLocale();
  429. }
  430. return product;
  431. }
  432. UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
  433. if (U_FAILURE(outErrorCode)) {
  434. // Do not overwrite the older error code
  435. return true;
  436. }
  437. outErrorCode = status_;
  438. return U_FAILURE(outErrorCode);
  439. }
  440. U_NAMESPACE_END