123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- * Copyright (C) 2014, International Business Machines
- * Corporation and others. All Rights Reserved.
- *******************************************************************************
- * loadednormalizer2impl.cpp
- *
- * created on: 2014sep03
- * created by: Markus W. Scherer
- */
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_NORMALIZATION
- #include "unicode/udata.h"
- #include "unicode/localpointer.h"
- #include "unicode/normalizer2.h"
- #include "unicode/ucptrie.h"
- #include "unicode/unistr.h"
- #include "unicode/unorm.h"
- #include "cstring.h"
- #include "mutex.h"
- #include "norm2allmodes.h"
- #include "normalizer2impl.h"
- #include "uassert.h"
- #include "ucln_cmn.h"
- #include "uhash.h"
- U_NAMESPACE_BEGIN
- class LoadedNormalizer2Impl : public Normalizer2Impl {
- public:
- LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
- virtual ~LoadedNormalizer2Impl();
- void load(const char *packageName, const char *name, UErrorCode &errorCode);
- private:
- static UBool U_CALLCONV
- isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
- UDataMemory *memory;
- UCPTrie *ownedTrie;
- };
- LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
- udata_close(memory);
- ucptrie_close(ownedTrie);
- }
- UBool U_CALLCONV
- LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
- const char * /* type */, const char * /*name*/,
- const UDataInfo *pInfo) {
- if(
- pInfo->size>=20 &&
- pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
- pInfo->charsetFamily==U_CHARSET_FAMILY &&
- pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
- pInfo->dataFormat[1]==0x72 &&
- pInfo->dataFormat[2]==0x6d &&
- pInfo->dataFormat[3]==0x32 &&
- pInfo->formatVersion[0]==4
- ) {
- // Normalizer2Impl *me=(Normalizer2Impl *)context;
- // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
- return true;
- } else {
- return false;
- }
- }
- void
- LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return;
- }
- memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
- const int32_t *inIndexes=(const int32_t *)inBytes;
- int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
- if(indexesLength<=IX_MIN_LCCC_CP) {
- errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
- return;
- }
- int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
- int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
- ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
- inBytes+offset, nextOffset-offset, nullptr,
- &errorCode);
- if(U_FAILURE(errorCode)) {
- return;
- }
- offset=nextOffset;
- nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
- const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
- // smallFCD: new in formatVersion 2
- offset=nextOffset;
- const uint8_t *inSmallFCD=inBytes+offset;
- init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
- }
- // instance cache ---------------------------------------------------------- ***
- Norm2AllModes *
- Norm2AllModes::createInstance(const char *packageName,
- const char *name,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return nullptr;
- }
- LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
- if(impl==nullptr) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- impl->load(packageName, name, errorCode);
- return createInstance(impl, errorCode);
- }
- U_CDECL_BEGIN
- static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
- U_CDECL_END
- #if !NORM2_HARDCODE_NFC_DATA
- static Norm2AllModes *nfcSingleton;
- static icu::UInitOnce nfcInitOnce {};
- #endif
- static Norm2AllModes *nfkcSingleton;
- static icu::UInitOnce nfkcInitOnce {};
- static Norm2AllModes *nfkc_cfSingleton;
- static icu::UInitOnce nfkc_cfInitOnce {};
- static UHashtable *cache=nullptr;
- // UInitOnce singleton initialization function
- static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
- #if !NORM2_HARDCODE_NFC_DATA
- if (uprv_strcmp(what, "nfc") == 0) {
- nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
- } else
- #endif
- if (uprv_strcmp(what, "nfkc") == 0) {
- nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
- } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
- nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
- } else {
- UPRV_UNREACHABLE_EXIT; // Unknown singleton
- }
- ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
- }
- U_CDECL_BEGIN
- static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
- delete (Norm2AllModes *)allModes;
- }
- static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
- #if !NORM2_HARDCODE_NFC_DATA
- delete nfcSingleton;
- nfcSingleton = nullptr;
- nfcInitOnce.reset();
- #endif
- delete nfkcSingleton;
- nfkcSingleton = nullptr;
- nfkcInitOnce.reset();
- delete nfkc_cfSingleton;
- nfkc_cfSingleton = nullptr;
- nfkc_cfInitOnce.reset();
- uhash_close(cache);
- cache=nullptr;
- return true;
- }
- U_CDECL_END
- #if !NORM2_HARDCODE_NFC_DATA
- const Norm2AllModes *
- Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return nullptr; }
- umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
- return nfcSingleton;
- }
- #endif
- const Norm2AllModes *
- Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return nullptr; }
- umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
- return nfkcSingleton;
- }
- const Norm2AllModes *
- Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return nullptr; }
- umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
- return nfkc_cfSingleton;
- }
- #if !NORM2_HARDCODE_NFC_DATA
- const Normalizer2 *
- Normalizer2::getNFCInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=nullptr ? &allModes->comp : nullptr;
- }
- const Normalizer2 *
- Normalizer2::getNFDInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=nullptr ? &allModes->decomp : nullptr;
- }
- const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=nullptr ? &allModes->fcd : nullptr;
- }
- const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=nullptr ? &allModes->fcc : nullptr;
- }
- const Normalizer2Impl *
- Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
- return allModes!=nullptr ? allModes->impl : nullptr;
- }
- #endif
- const Normalizer2 *
- Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
- return allModes!=nullptr ? &allModes->comp : nullptr;
- }
- const Normalizer2 *
- Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
- return allModes!=nullptr ? &allModes->decomp : nullptr;
- }
- const Normalizer2 *
- Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
- return allModes!=nullptr ? &allModes->comp : nullptr;
- }
- const Normalizer2 *
- Normalizer2::getInstance(const char *packageName,
- const char *name,
- UNormalization2Mode mode,
- UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return nullptr;
- }
- if(name==nullptr || *name==0) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- const Norm2AllModes *allModes=nullptr;
- if(packageName==nullptr) {
- if(0==uprv_strcmp(name, "nfc")) {
- allModes=Norm2AllModes::getNFCInstance(errorCode);
- } else if(0==uprv_strcmp(name, "nfkc")) {
- allModes=Norm2AllModes::getNFKCInstance(errorCode);
- } else if(0==uprv_strcmp(name, "nfkc_cf")) {
- allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
- }
- }
- if(allModes==nullptr && U_SUCCESS(errorCode)) {
- {
- Mutex lock;
- if(cache!=nullptr) {
- allModes=(Norm2AllModes *)uhash_get(cache, name);
- }
- }
- if(allModes==nullptr) {
- ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
- LocalPointer<Norm2AllModes> localAllModes(
- Norm2AllModes::createInstance(packageName, name, errorCode));
- if(U_SUCCESS(errorCode)) {
- Mutex lock;
- if(cache==nullptr) {
- cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
- if(U_FAILURE(errorCode)) {
- return nullptr;
- }
- uhash_setKeyDeleter(cache, uprv_free);
- uhash_setValueDeleter(cache, deleteNorm2AllModes);
- }
- void *temp=uhash_get(cache, name);
- if(temp==nullptr) {
- int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
- char *nameCopy=(char *)uprv_malloc(keyLength);
- if(nameCopy==nullptr) {
- errorCode=U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- uprv_memcpy(nameCopy, name, keyLength);
- allModes=localAllModes.getAlias();
- uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
- } else {
- // race condition
- allModes=(Norm2AllModes *)temp;
- }
- }
- }
- }
- if(allModes!=nullptr && U_SUCCESS(errorCode)) {
- switch(mode) {
- case UNORM2_COMPOSE:
- return &allModes->comp;
- case UNORM2_DECOMPOSE:
- return &allModes->decomp;
- case UNORM2_FCD:
- return &allModes->fcd;
- case UNORM2_COMPOSE_CONTIGUOUS:
- return &allModes->fcc;
- default:
- break; // do nothing
- }
- }
- return nullptr;
- }
- const Normalizer2 *
- Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) {
- return nullptr;
- }
- switch(mode) {
- case UNORM_NFD:
- return Normalizer2::getNFDInstance(errorCode);
- case UNORM_NFKD:
- return Normalizer2::getNFKDInstance(errorCode);
- case UNORM_NFC:
- return Normalizer2::getNFCInstance(errorCode);
- case UNORM_NFKC:
- return Normalizer2::getNFKCInstance(errorCode);
- case UNORM_FCD:
- return getFCDInstance(errorCode);
- default: // UNORM_NONE
- return getNoopInstance(errorCode);
- }
- }
- const Normalizer2Impl *
- Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
- return allModes!=nullptr ? allModes->impl : nullptr;
- }
- const Normalizer2Impl *
- Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
- const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
- return allModes!=nullptr ? allModes->impl : nullptr;
- }
- U_NAMESPACE_END
- // C API ------------------------------------------------------------------- ***
- U_NAMESPACE_USE
- U_CAPI const UNormalizer2 * U_EXPORT2
- unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
- }
- U_CAPI const UNormalizer2 * U_EXPORT2
- unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
- }
- U_CAPI const UNormalizer2 * U_EXPORT2
- unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
- }
- U_CAPI const UNormalizer2 * U_EXPORT2
- unorm2_getInstance(const char *packageName,
- const char *name,
- UNormalization2Mode mode,
- UErrorCode *pErrorCode) {
- return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
- }
- U_CFUNC UNormalizationCheckResult
- unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
- if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
- return UNORM_YES;
- }
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
- if(U_SUCCESS(errorCode)) {
- return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
- } else {
- return UNORM_MAYBE;
- }
- }
- #endif // !UCONFIG_NO_NORMALIZATION
|