localebuilder.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. // © 2019 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include <utility>
  4. #include "bytesinkutil.h" // CharStringByteSink
  5. #include "charstr.h"
  6. #include "cstring.h"
  7. #include "ulocimp.h"
  8. #include "unicode/localebuilder.h"
  9. #include "unicode/locid.h"
  10. U_NAMESPACE_BEGIN
  11. #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
  12. #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
  13. constexpr const char* kAttributeKey = "attribute";
  14. static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
  15. switch (uprv_tolower(key)) {
  16. case 'u':
  17. return ultag_isUnicodeExtensionSubtags(s, len);
  18. case 't':
  19. return ultag_isTransformedExtensionSubtags(s, len);
  20. case 'x':
  21. return ultag_isPrivateuseValueSubtags(s, len);
  22. default:
  23. return ultag_isExtensionSubtags(s, len);
  24. }
  25. }
  26. LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
  27. script_(), region_(), variant_(nullptr), extensions_(nullptr)
  28. {
  29. language_[0] = 0;
  30. script_[0] = 0;
  31. region_[0] = 0;
  32. }
  33. LocaleBuilder::~LocaleBuilder()
  34. {
  35. delete variant_;
  36. delete extensions_;
  37. }
  38. LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
  39. {
  40. clear();
  41. setLanguage(locale.getLanguage());
  42. setScript(locale.getScript());
  43. setRegion(locale.getCountry());
  44. setVariant(locale.getVariant());
  45. extensions_ = locale.clone();
  46. if (extensions_ == nullptr) {
  47. status_ = U_MEMORY_ALLOCATION_ERROR;
  48. }
  49. return *this;
  50. }
  51. LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
  52. {
  53. Locale l = Locale::forLanguageTag(tag, status_);
  54. if (U_FAILURE(status_)) { return *this; }
  55. // Because setLocale will reset status_ we need to return
  56. // first if we have error in forLanguageTag.
  57. setLocale(l);
  58. return *this;
  59. }
  60. static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
  61. UBool (*test)(const char*, int32_t)) {
  62. if (U_FAILURE(errorCode)) { return; }
  63. if (input.empty()) {
  64. dest[0] = '\0';
  65. } else if (test(input.data(), input.length())) {
  66. uprv_memcpy(dest, input.data(), input.length());
  67. dest[input.length()] = '\0';
  68. } else {
  69. errorCode = U_ILLEGAL_ARGUMENT_ERROR;
  70. }
  71. }
  72. LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
  73. {
  74. setField(language, language_, status_, &ultag_isLanguageSubtag);
  75. return *this;
  76. }
  77. LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
  78. {
  79. setField(script, script_, status_, &ultag_isScriptSubtag);
  80. return *this;
  81. }
  82. LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
  83. {
  84. setField(region, region_, status_, &ultag_isRegionSubtag);
  85. return *this;
  86. }
  87. static void transform(char* data, int32_t len) {
  88. for (int32_t i = 0; i < len; i++, data++) {
  89. if (*data == '_') {
  90. *data = '-';
  91. } else {
  92. *data = uprv_tolower(*data);
  93. }
  94. }
  95. }
  96. LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
  97. {
  98. if (U_FAILURE(status_)) { return *this; }
  99. if (variant.empty()) {
  100. delete variant_;
  101. variant_ = nullptr;
  102. return *this;
  103. }
  104. CharString* new_variant = new CharString(variant, status_);
  105. if (U_FAILURE(status_)) { return *this; }
  106. if (new_variant == nullptr) {
  107. status_ = U_MEMORY_ALLOCATION_ERROR;
  108. return *this;
  109. }
  110. transform(new_variant->data(), new_variant->length());
  111. if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
  112. delete new_variant;
  113. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  114. return *this;
  115. }
  116. delete variant_;
  117. variant_ = new_variant;
  118. return *this;
  119. }
  120. static bool
  121. _isKeywordValue(const char* key, const char* value, int32_t value_len)
  122. {
  123. if (key[1] == '\0') {
  124. // one char key
  125. return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
  126. _isExtensionSubtags(key[0], value, value_len));
  127. } else if (uprv_strcmp(key, kAttributeKey) == 0) {
  128. // unicode attributes
  129. return ultag_isUnicodeLocaleAttributes(value, value_len);
  130. }
  131. // otherwise: unicode extension value
  132. // We need to convert from legacy key/value to unicode
  133. // key/value
  134. const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
  135. const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
  136. return unicode_locale_key && unicode_locale_type &&
  137. ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
  138. ultag_isUnicodeLocaleType(unicode_locale_type, -1);
  139. }
  140. static void
  141. _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
  142. Locale& to, bool validate, UErrorCode& errorCode)
  143. {
  144. if (U_FAILURE(errorCode)) { return; }
  145. LocalPointer<icu::StringEnumeration> ownedKeywords;
  146. if (keywords == nullptr) {
  147. ownedKeywords.adoptInstead(from.createKeywords(errorCode));
  148. if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
  149. keywords = ownedKeywords.getAlias();
  150. }
  151. const char* key;
  152. while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
  153. CharString value;
  154. CharStringByteSink sink(&value);
  155. from.getKeywordValue(key, sink, errorCode);
  156. if (U_FAILURE(errorCode)) { return; }
  157. if (uprv_strcmp(key, kAttributeKey) == 0) {
  158. transform(value.data(), value.length());
  159. }
  160. if (validate &&
  161. !_isKeywordValue(key, value.data(), value.length())) {
  162. errorCode = U_ILLEGAL_ARGUMENT_ERROR;
  163. return;
  164. }
  165. to.setKeywordValue(key, value.data(), errorCode);
  166. if (U_FAILURE(errorCode)) { return; }
  167. }
  168. }
  169. void static
  170. _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
  171. {
  172. // Clear Unicode attributes
  173. locale.setKeywordValue(kAttributeKey, "", errorCode);
  174. // Clear all Unicode keyword values
  175. LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
  176. if (U_FAILURE(errorCode) || iter.isNull()) { return; }
  177. const char* key;
  178. while ((key = iter->next(nullptr, errorCode)) != nullptr) {
  179. locale.setUnicodeKeywordValue(key, nullptr, errorCode);
  180. }
  181. }
  182. static void
  183. _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
  184. {
  185. // Add the unicode extensions to extensions_
  186. CharString locale_str("und-u-", errorCode);
  187. locale_str.append(value, errorCode);
  188. _copyExtensions(
  189. Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
  190. locale, false, errorCode);
  191. }
  192. LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
  193. {
  194. if (U_FAILURE(status_)) { return *this; }
  195. if (!UPRV_ISALPHANUM(key)) {
  196. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  197. return *this;
  198. }
  199. CharString value_str(value, status_);
  200. if (U_FAILURE(status_)) { return *this; }
  201. transform(value_str.data(), value_str.length());
  202. if (!value_str.isEmpty() &&
  203. !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
  204. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  205. return *this;
  206. }
  207. if (extensions_ == nullptr) {
  208. extensions_ = Locale::getRoot().clone();
  209. if (extensions_ == nullptr) {
  210. status_ = U_MEMORY_ALLOCATION_ERROR;
  211. return *this;
  212. }
  213. }
  214. if (uprv_tolower(key) != 'u') {
  215. // for t, x and others extension.
  216. extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
  217. status_);
  218. return *this;
  219. }
  220. _clearUAttributesAndKeyType(*extensions_, status_);
  221. if (U_FAILURE(status_)) { return *this; }
  222. if (!value.empty()) {
  223. _setUnicodeExtensions(*extensions_, value_str, status_);
  224. }
  225. return *this;
  226. }
  227. LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
  228. StringPiece key, StringPiece type)
  229. {
  230. if (U_FAILURE(status_)) { return *this; }
  231. if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
  232. (!type.empty() &&
  233. !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
  234. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  235. return *this;
  236. }
  237. if (extensions_ == nullptr) {
  238. extensions_ = Locale::getRoot().clone();
  239. if (extensions_ == nullptr) {
  240. status_ = U_MEMORY_ALLOCATION_ERROR;
  241. return *this;
  242. }
  243. }
  244. extensions_->setUnicodeKeywordValue(key, type, status_);
  245. return *this;
  246. }
  247. LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
  248. StringPiece value)
  249. {
  250. CharString value_str(value, status_);
  251. if (U_FAILURE(status_)) { return *this; }
  252. transform(value_str.data(), value_str.length());
  253. if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
  254. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  255. return *this;
  256. }
  257. if (extensions_ == nullptr) {
  258. extensions_ = Locale::getRoot().clone();
  259. if (extensions_ == nullptr) {
  260. status_ = U_MEMORY_ALLOCATION_ERROR;
  261. return *this;
  262. }
  263. extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
  264. return *this;
  265. }
  266. CharString attributes;
  267. CharStringByteSink sink(&attributes);
  268. UErrorCode localErrorCode = U_ZERO_ERROR;
  269. extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
  270. if (U_FAILURE(localErrorCode)) {
  271. CharString new_attributes(value_str.data(), status_);
  272. // No attributes, set the attribute.
  273. extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
  274. return *this;
  275. }
  276. transform(attributes.data(),attributes.length());
  277. const char* start = attributes.data();
  278. const char* limit = attributes.data() + attributes.length();
  279. CharString new_attributes;
  280. bool inserted = false;
  281. while (start < limit) {
  282. if (!inserted) {
  283. int cmp = uprv_strcmp(start, value_str.data());
  284. if (cmp == 0) { return *this; } // Found it in attributes: Just return
  285. if (cmp > 0) {
  286. if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
  287. new_attributes.append(value_str.data(), status_);
  288. inserted = true;
  289. }
  290. }
  291. if (!new_attributes.isEmpty()) {
  292. new_attributes.append('_', status_);
  293. }
  294. new_attributes.append(start, status_);
  295. start += uprv_strlen(start) + 1;
  296. }
  297. if (!inserted) {
  298. if (!new_attributes.isEmpty()) {
  299. new_attributes.append('_', status_);
  300. }
  301. new_attributes.append(value_str.data(), status_);
  302. }
  303. // Not yet in the attributes, set the attribute.
  304. extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
  305. return *this;
  306. }
  307. LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
  308. StringPiece value)
  309. {
  310. CharString value_str(value, status_);
  311. if (U_FAILURE(status_)) { return *this; }
  312. transform(value_str.data(), value_str.length());
  313. if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
  314. status_ = U_ILLEGAL_ARGUMENT_ERROR;
  315. return *this;
  316. }
  317. if (extensions_ == nullptr) { return *this; }
  318. UErrorCode localErrorCode = U_ZERO_ERROR;
  319. CharString attributes;
  320. CharStringByteSink sink(&attributes);
  321. extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
  322. // get failure, just return
  323. if (U_FAILURE(localErrorCode)) { return *this; }
  324. // Do not have any attributes, just return.
  325. if (attributes.isEmpty()) { return *this; }
  326. char* p = attributes.data();
  327. // Replace null terminiator in place for _ and - so later
  328. // we can use uprv_strcmp to compare.
  329. for (int32_t i = 0; i < attributes.length(); i++, p++) {
  330. *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
  331. }
  332. const char* start = attributes.data();
  333. const char* limit = attributes.data() + attributes.length();
  334. CharString new_attributes;
  335. bool found = false;
  336. while (start < limit) {
  337. if (uprv_strcmp(start, value_str.data()) == 0) {
  338. found = true;
  339. } else {
  340. if (!new_attributes.isEmpty()) {
  341. new_attributes.append('_', status_);
  342. }
  343. new_attributes.append(start, status_);
  344. }
  345. start += uprv_strlen(start) + 1;
  346. }
  347. // Found the value in attributes, set the attribute.
  348. if (found) {
  349. extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
  350. }
  351. return *this;
  352. }
  353. LocaleBuilder& LocaleBuilder::clear()
  354. {
  355. status_ = U_ZERO_ERROR;
  356. language_[0] = 0;
  357. script_[0] = 0;
  358. region_[0] = 0;
  359. delete variant_;
  360. variant_ = nullptr;
  361. clearExtensions();
  362. return *this;
  363. }
  364. LocaleBuilder& LocaleBuilder::clearExtensions()
  365. {
  366. delete extensions_;
  367. extensions_ = nullptr;
  368. return *this;
  369. }
  370. Locale makeBogusLocale() {
  371. Locale bogus;
  372. bogus.setToBogus();
  373. return bogus;
  374. }
  375. void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
  376. {
  377. if (U_FAILURE(errorCode)) { return; }
  378. LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
  379. if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
  380. // Error, or no extensions to copy.
  381. return;
  382. }
  383. if (extensions_ == nullptr) {
  384. extensions_ = Locale::getRoot().clone();
  385. if (extensions_ == nullptr) {
  386. status_ = U_MEMORY_ALLOCATION_ERROR;
  387. return;
  388. }
  389. }
  390. _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
  391. }
  392. Locale LocaleBuilder::build(UErrorCode& errorCode)
  393. {
  394. if (U_FAILURE(errorCode)) {
  395. return makeBogusLocale();
  396. }
  397. if (U_FAILURE(status_)) {
  398. errorCode = status_;
  399. return makeBogusLocale();
  400. }
  401. CharString locale_str(language_, errorCode);
  402. if (uprv_strlen(script_) > 0) {
  403. locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
  404. }
  405. if (uprv_strlen(region_) > 0) {
  406. locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
  407. }
  408. if (variant_ != nullptr) {
  409. locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
  410. }
  411. if (U_FAILURE(errorCode)) {
  412. return makeBogusLocale();
  413. }
  414. Locale product(locale_str.data());
  415. if (extensions_ != nullptr) {
  416. _copyExtensions(*extensions_, nullptr, product, true, errorCode);
  417. }
  418. if (U_FAILURE(errorCode)) {
  419. return makeBogusLocale();
  420. }
  421. return product;
  422. }
  423. UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
  424. if (U_FAILURE(outErrorCode)) {
  425. // Do not overwrite the older error code
  426. return true;
  427. }
  428. outErrorCode = status_;
  429. return U_FAILURE(outErrorCode);
  430. }
  431. U_NAMESPACE_END