123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485 |
- // © 2019 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- #include <optional>
- #include <string_view>
- #include <utility>
- #include "bytesinkutil.h" // StringByteSink<CharString>
- #include "charstr.h"
- #include "cstring.h"
- #include "ulocimp.h"
- #include "unicode/localebuilder.h"
- #include "unicode/locid.h"
- namespace {
- inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
- inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
- constexpr const char* kAttributeKey = "attribute";
- bool _isExtensionSubtags(char key, const char* s, int32_t len) {
- switch (uprv_tolower(key)) {
- case 'u':
- return ultag_isUnicodeExtensionSubtags(s, len);
- case 't':
- return ultag_isTransformedExtensionSubtags(s, len);
- case 'x':
- return ultag_isPrivateuseValueSubtags(s, len);
- default:
- return ultag_isExtensionSubtags(s, len);
- }
- }
- } // namespace
- U_NAMESPACE_BEGIN
- LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
- script_(), region_(), variant_(nullptr), extensions_(nullptr)
- {
- language_[0] = 0;
- script_[0] = 0;
- region_[0] = 0;
- }
- LocaleBuilder::~LocaleBuilder()
- {
- delete variant_;
- delete extensions_;
- }
- LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
- {
- clear();
- setLanguage(locale.getLanguage());
- setScript(locale.getScript());
- setRegion(locale.getCountry());
- setVariant(locale.getVariant());
- extensions_ = locale.clone();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- }
- return *this;
- }
- LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
- {
- Locale l = Locale::forLanguageTag(tag, status_);
- if (U_FAILURE(status_)) { return *this; }
- // Because setLocale will reset status_ we need to return
- // first if we have error in forLanguageTag.
- setLocale(l);
- return *this;
- }
- namespace {
- void setField(StringPiece input, char* dest, UErrorCode& errorCode,
- bool (*test)(const char*, int32_t)) {
- if (U_FAILURE(errorCode)) { return; }
- if (input.empty()) {
- dest[0] = '\0';
- } else if (test(input.data(), input.length())) {
- uprv_memcpy(dest, input.data(), input.length());
- dest[input.length()] = '\0';
- } else {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- } // namespace
- LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
- {
- setField(language, language_, status_, &ultag_isLanguageSubtag);
- return *this;
- }
- LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
- {
- setField(script, script_, status_, &ultag_isScriptSubtag);
- return *this;
- }
- LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
- {
- setField(region, region_, status_, &ultag_isRegionSubtag);
- return *this;
- }
- namespace {
- void transform(char* data, int32_t len) {
- for (int32_t i = 0; i < len; i++, data++) {
- if (*data == '_') {
- *data = '-';
- } else {
- *data = uprv_tolower(*data);
- }
- }
- }
- } // namespace
- LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
- {
- if (U_FAILURE(status_)) { return *this; }
- if (variant.empty()) {
- delete variant_;
- variant_ = nullptr;
- return *this;
- }
- CharString* new_variant = new CharString(variant, status_);
- if (U_FAILURE(status_)) { return *this; }
- if (new_variant == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- transform(new_variant->data(), new_variant->length());
- if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
- delete new_variant;
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- delete variant_;
- variant_ = new_variant;
- return *this;
- }
- namespace {
- bool
- _isKeywordValue(const char* key, const char* value, int32_t value_len)
- {
- if (key[1] == '\0') {
- // one char key
- return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
- _isExtensionSubtags(key[0], value, value_len));
- } else if (uprv_strcmp(key, kAttributeKey) == 0) {
- // unicode attributes
- return ultag_isUnicodeLocaleAttributes(value, value_len);
- }
- // otherwise: unicode extension value
- // We need to convert from legacy key/value to unicode
- // key/value
- std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
- std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);
- return unicode_locale_key.has_value() &&
- unicode_locale_type.has_value() &&
- ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
- static_cast<int32_t>(unicode_locale_key->size())) &&
- ultag_isUnicodeLocaleType(unicode_locale_type->data(),
- static_cast<int32_t>(unicode_locale_type->size()));
- }
- void
- _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
- Locale& to, bool validate, UErrorCode& errorCode)
- {
- if (U_FAILURE(errorCode)) { return; }
- LocalPointer<icu::StringEnumeration> ownedKeywords;
- if (keywords == nullptr) {
- ownedKeywords.adoptInstead(from.createKeywords(errorCode));
- if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
- keywords = ownedKeywords.getAlias();
- }
- const char* key;
- while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
- auto value = from.getKeywordValue<CharString>(key, errorCode);
- if (U_FAILURE(errorCode)) { return; }
- if (uprv_strcmp(key, kAttributeKey) == 0) {
- transform(value.data(), value.length());
- }
- if (validate &&
- !_isKeywordValue(key, value.data(), value.length())) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- to.setKeywordValue(key, value.data(), errorCode);
- if (U_FAILURE(errorCode)) { return; }
- }
- }
- void
- _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
- {
- if (U_FAILURE(errorCode)) { return; }
- // Clear Unicode attributes
- locale.setKeywordValue(kAttributeKey, "", errorCode);
- // Clear all Unicode keyword values
- LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
- if (U_FAILURE(errorCode) || iter.isNull()) { return; }
- const char* key;
- while ((key = iter->next(nullptr, errorCode)) != nullptr) {
- locale.setUnicodeKeywordValue(key, nullptr, errorCode);
- }
- }
- void
- _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
- {
- if (U_FAILURE(errorCode)) { return; }
- // Add the unicode extensions to extensions_
- CharString locale_str("und-u-", errorCode);
- locale_str.append(value, errorCode);
- _copyExtensions(
- Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
- locale, false, errorCode);
- }
- } // namespace
- LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
- {
- if (U_FAILURE(status_)) { return *this; }
- if (!UPRV_ISALPHANUM(key)) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- CharString value_str(value, status_);
- if (U_FAILURE(status_)) { return *this; }
- transform(value_str.data(), value_str.length());
- if (!value_str.isEmpty() &&
- !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if (extensions_ == nullptr) {
- extensions_ = Locale::getRoot().clone();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- }
- if (uprv_tolower(key) != 'u') {
- // for t, x and others extension.
- extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
- status_);
- return *this;
- }
- _clearUAttributesAndKeyType(*extensions_, status_);
- if (U_FAILURE(status_)) { return *this; }
- if (!value.empty()) {
- _setUnicodeExtensions(*extensions_, value_str, status_);
- }
- return *this;
- }
- LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
- StringPiece key, StringPiece type)
- {
- if (U_FAILURE(status_)) { return *this; }
- if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
- (!type.empty() &&
- !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if (extensions_ == nullptr) {
- extensions_ = Locale::getRoot().clone();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- }
- extensions_->setUnicodeKeywordValue(key, type, status_);
- return *this;
- }
- LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
- StringPiece value)
- {
- CharString value_str(value, status_);
- if (U_FAILURE(status_)) { return *this; }
- transform(value_str.data(), value_str.length());
- if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if (extensions_ == nullptr) {
- extensions_ = Locale::getRoot().clone();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return *this;
- }
- extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
- return *this;
- }
- UErrorCode localErrorCode = U_ZERO_ERROR;
- auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
- if (U_FAILURE(localErrorCode)) {
- CharString new_attributes(value_str.data(), status_);
- // No attributes, set the attribute.
- extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
- return *this;
- }
- transform(attributes.data(),attributes.length());
- const char* start = attributes.data();
- const char* limit = attributes.data() + attributes.length();
- CharString new_attributes;
- bool inserted = false;
- while (start < limit) {
- if (!inserted) {
- int cmp = uprv_strcmp(start, value_str.data());
- if (cmp == 0) { return *this; } // Found it in attributes: Just return
- if (cmp > 0) {
- if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
- new_attributes.append(value_str.data(), status_);
- inserted = true;
- }
- }
- if (!new_attributes.isEmpty()) {
- new_attributes.append('_', status_);
- }
- new_attributes.append(start, status_);
- start += uprv_strlen(start) + 1;
- }
- if (!inserted) {
- if (!new_attributes.isEmpty()) {
- new_attributes.append('_', status_);
- }
- new_attributes.append(value_str.data(), status_);
- }
- // Not yet in the attributes, set the attribute.
- extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
- return *this;
- }
- LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
- StringPiece value)
- {
- CharString value_str(value, status_);
- if (U_FAILURE(status_)) { return *this; }
- transform(value_str.data(), value_str.length());
- if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
- status_ = U_ILLEGAL_ARGUMENT_ERROR;
- return *this;
- }
- if (extensions_ == nullptr) { return *this; }
- UErrorCode localErrorCode = U_ZERO_ERROR;
- auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
- // get failure, just return
- if (U_FAILURE(localErrorCode)) { return *this; }
- // Do not have any attributes, just return.
- if (attributes.isEmpty()) { return *this; }
- char* p = attributes.data();
- // Replace null terminiator in place for _ and - so later
- // we can use uprv_strcmp to compare.
- for (int32_t i = 0; i < attributes.length(); i++, p++) {
- *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
- }
- const char* start = attributes.data();
- const char* limit = attributes.data() + attributes.length();
- CharString new_attributes;
- bool found = false;
- while (start < limit) {
- if (uprv_strcmp(start, value_str.data()) == 0) {
- found = true;
- } else {
- if (!new_attributes.isEmpty()) {
- new_attributes.append('_', status_);
- }
- new_attributes.append(start, status_);
- }
- start += uprv_strlen(start) + 1;
- }
- // Found the value in attributes, set the attribute.
- if (found) {
- extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
- }
- return *this;
- }
- LocaleBuilder& LocaleBuilder::clear()
- {
- status_ = U_ZERO_ERROR;
- language_[0] = 0;
- script_[0] = 0;
- region_[0] = 0;
- delete variant_;
- variant_ = nullptr;
- clearExtensions();
- return *this;
- }
- LocaleBuilder& LocaleBuilder::clearExtensions()
- {
- delete extensions_;
- extensions_ = nullptr;
- return *this;
- }
- Locale makeBogusLocale() {
- Locale bogus;
- bogus.setToBogus();
- return bogus;
- }
- void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
- {
- if (U_FAILURE(errorCode)) { return; }
- LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
- if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
- // Error, or no extensions to copy.
- return;
- }
- if (extensions_ == nullptr) {
- extensions_ = Locale::getRoot().clone();
- if (extensions_ == nullptr) {
- status_ = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- }
- _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
- }
- Locale LocaleBuilder::build(UErrorCode& errorCode)
- {
- if (U_FAILURE(errorCode)) {
- return makeBogusLocale();
- }
- if (U_FAILURE(status_)) {
- errorCode = status_;
- return makeBogusLocale();
- }
- CharString locale_str(language_, errorCode);
- if (uprv_strlen(script_) > 0) {
- locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
- }
- if (uprv_strlen(region_) > 0) {
- locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
- }
- if (variant_ != nullptr) {
- locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
- }
- if (U_FAILURE(errorCode)) {
- return makeBogusLocale();
- }
- Locale product(locale_str.data());
- if (extensions_ != nullptr) {
- _copyExtensions(*extensions_, nullptr, product, true, errorCode);
- }
- if (U_FAILURE(errorCode)) {
- return makeBogusLocale();
- }
- return product;
- }
- UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
- if (U_FAILURE(outErrorCode)) {
- // Do not overwrite the older error code
- return true;
- }
- outErrorCode = status_;
- return U_FAILURE(outErrorCode);
- }
- U_NAMESPACE_END
|