loadednormalizer2impl.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * loadednormalizer2impl.cpp
  9. *
  10. * created on: 2014sep03
  11. * created by: Markus W. Scherer
  12. */
  13. #include "unicode/utypes.h"
  14. #if !UCONFIG_NO_NORMALIZATION
  15. #include "unicode/udata.h"
  16. #include "unicode/localpointer.h"
  17. #include "unicode/normalizer2.h"
  18. #include "unicode/ucptrie.h"
  19. #include "unicode/unistr.h"
  20. #include "unicode/unorm.h"
  21. #include "cstring.h"
  22. #include "mutex.h"
  23. #include "norm2allmodes.h"
  24. #include "normalizer2impl.h"
  25. #include "uassert.h"
  26. #include "ucln_cmn.h"
  27. #include "uhash.h"
  28. U_NAMESPACE_BEGIN
  29. class LoadedNormalizer2Impl : public Normalizer2Impl {
  30. public:
  31. LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
  32. virtual ~LoadedNormalizer2Impl();
  33. void load(const char *packageName, const char *name, UErrorCode &errorCode);
  34. private:
  35. static UBool U_CALLCONV
  36. isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
  37. UDataMemory *memory;
  38. UCPTrie *ownedTrie;
  39. };
  40. LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
  41. udata_close(memory);
  42. ucptrie_close(ownedTrie);
  43. }
  44. UBool U_CALLCONV
  45. LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
  46. const char * /* type */, const char * /*name*/,
  47. const UDataInfo *pInfo) {
  48. if(
  49. pInfo->size>=20 &&
  50. pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
  51. pInfo->charsetFamily==U_CHARSET_FAMILY &&
  52. pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
  53. pInfo->dataFormat[1]==0x72 &&
  54. pInfo->dataFormat[2]==0x6d &&
  55. pInfo->dataFormat[3]==0x32 &&
  56. pInfo->formatVersion[0]==4
  57. ) {
  58. // Normalizer2Impl *me=(Normalizer2Impl *)context;
  59. // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
  60. return true;
  61. } else {
  62. return false;
  63. }
  64. }
  65. void
  66. LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
  67. if(U_FAILURE(errorCode)) {
  68. return;
  69. }
  70. memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
  71. if(U_FAILURE(errorCode)) {
  72. return;
  73. }
  74. const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
  75. const int32_t *inIndexes=(const int32_t *)inBytes;
  76. int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
  77. if(indexesLength<=IX_MIN_LCCC_CP) {
  78. errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
  79. return;
  80. }
  81. int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
  82. int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
  83. ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
  84. inBytes+offset, nextOffset-offset, nullptr,
  85. &errorCode);
  86. if(U_FAILURE(errorCode)) {
  87. return;
  88. }
  89. offset=nextOffset;
  90. nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
  91. const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
  92. // smallFCD: new in formatVersion 2
  93. offset=nextOffset;
  94. const uint8_t *inSmallFCD=inBytes+offset;
  95. init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
  96. }
  97. // instance cache ---------------------------------------------------------- ***
  98. Norm2AllModes *
  99. Norm2AllModes::createInstance(const char *packageName,
  100. const char *name,
  101. UErrorCode &errorCode) {
  102. if(U_FAILURE(errorCode)) {
  103. return nullptr;
  104. }
  105. LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
  106. if(impl==nullptr) {
  107. errorCode=U_MEMORY_ALLOCATION_ERROR;
  108. return nullptr;
  109. }
  110. impl->load(packageName, name, errorCode);
  111. return createInstance(impl, errorCode);
  112. }
  113. U_CDECL_BEGIN
  114. static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
  115. U_CDECL_END
  116. #if !NORM2_HARDCODE_NFC_DATA
  117. static Norm2AllModes *nfcSingleton;
  118. static icu::UInitOnce nfcInitOnce {};
  119. #endif
  120. static Norm2AllModes *nfkcSingleton;
  121. static icu::UInitOnce nfkcInitOnce {};
  122. static Norm2AllModes *nfkc_cfSingleton;
  123. static icu::UInitOnce nfkc_cfInitOnce {};
  124. static UHashtable *cache=nullptr;
  125. // UInitOnce singleton initialization function
  126. static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
  127. #if !NORM2_HARDCODE_NFC_DATA
  128. if (uprv_strcmp(what, "nfc") == 0) {
  129. nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
  130. } else
  131. #endif
  132. if (uprv_strcmp(what, "nfkc") == 0) {
  133. nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
  134. } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
  135. nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
  136. } else {
  137. UPRV_UNREACHABLE_EXIT; // Unknown singleton
  138. }
  139. ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
  140. }
  141. U_CDECL_BEGIN
  142. static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
  143. delete (Norm2AllModes *)allModes;
  144. }
  145. static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
  146. #if !NORM2_HARDCODE_NFC_DATA
  147. delete nfcSingleton;
  148. nfcSingleton = nullptr;
  149. nfcInitOnce.reset();
  150. #endif
  151. delete nfkcSingleton;
  152. nfkcSingleton = nullptr;
  153. nfkcInitOnce.reset();
  154. delete nfkc_cfSingleton;
  155. nfkc_cfSingleton = nullptr;
  156. nfkc_cfInitOnce.reset();
  157. uhash_close(cache);
  158. cache=nullptr;
  159. return true;
  160. }
  161. U_CDECL_END
  162. #if !NORM2_HARDCODE_NFC_DATA
  163. const Norm2AllModes *
  164. Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
  165. if(U_FAILURE(errorCode)) { return nullptr; }
  166. umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
  167. return nfcSingleton;
  168. }
  169. #endif
  170. const Norm2AllModes *
  171. Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
  172. if(U_FAILURE(errorCode)) { return nullptr; }
  173. umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
  174. return nfkcSingleton;
  175. }
  176. const Norm2AllModes *
  177. Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
  178. if(U_FAILURE(errorCode)) { return nullptr; }
  179. umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
  180. return nfkc_cfSingleton;
  181. }
  182. #if !NORM2_HARDCODE_NFC_DATA
  183. const Normalizer2 *
  184. Normalizer2::getNFCInstance(UErrorCode &errorCode) {
  185. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  186. return allModes!=nullptr ? &allModes->comp : nullptr;
  187. }
  188. const Normalizer2 *
  189. Normalizer2::getNFDInstance(UErrorCode &errorCode) {
  190. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  191. return allModes!=nullptr ? &allModes->decomp : nullptr;
  192. }
  193. const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
  194. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  195. return allModes!=nullptr ? &allModes->fcd : nullptr;
  196. }
  197. const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
  198. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  199. return allModes!=nullptr ? &allModes->fcc : nullptr;
  200. }
  201. const Normalizer2Impl *
  202. Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
  203. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  204. return allModes!=nullptr ? allModes->impl : nullptr;
  205. }
  206. #endif
  207. const Normalizer2 *
  208. Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
  209. const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
  210. return allModes!=nullptr ? &allModes->comp : nullptr;
  211. }
  212. const Normalizer2 *
  213. Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
  214. const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
  215. return allModes!=nullptr ? &allModes->decomp : nullptr;
  216. }
  217. const Normalizer2 *
  218. Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
  219. const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
  220. return allModes!=nullptr ? &allModes->comp : nullptr;
  221. }
  222. const Normalizer2 *
  223. Normalizer2::getInstance(const char *packageName,
  224. const char *name,
  225. UNormalization2Mode mode,
  226. UErrorCode &errorCode) {
  227. if(U_FAILURE(errorCode)) {
  228. return nullptr;
  229. }
  230. if(name==nullptr || *name==0) {
  231. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  232. return nullptr;
  233. }
  234. const Norm2AllModes *allModes=nullptr;
  235. if(packageName==nullptr) {
  236. if(0==uprv_strcmp(name, "nfc")) {
  237. allModes=Norm2AllModes::getNFCInstance(errorCode);
  238. } else if(0==uprv_strcmp(name, "nfkc")) {
  239. allModes=Norm2AllModes::getNFKCInstance(errorCode);
  240. } else if(0==uprv_strcmp(name, "nfkc_cf")) {
  241. allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
  242. }
  243. }
  244. if(allModes==nullptr && U_SUCCESS(errorCode)) {
  245. {
  246. Mutex lock;
  247. if(cache!=nullptr) {
  248. allModes=(Norm2AllModes *)uhash_get(cache, name);
  249. }
  250. }
  251. if(allModes==nullptr) {
  252. ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
  253. LocalPointer<Norm2AllModes> localAllModes(
  254. Norm2AllModes::createInstance(packageName, name, errorCode));
  255. if(U_SUCCESS(errorCode)) {
  256. Mutex lock;
  257. if(cache==nullptr) {
  258. cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
  259. if(U_FAILURE(errorCode)) {
  260. return nullptr;
  261. }
  262. uhash_setKeyDeleter(cache, uprv_free);
  263. uhash_setValueDeleter(cache, deleteNorm2AllModes);
  264. }
  265. void *temp=uhash_get(cache, name);
  266. if(temp==nullptr) {
  267. int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
  268. char *nameCopy=(char *)uprv_malloc(keyLength);
  269. if(nameCopy==nullptr) {
  270. errorCode=U_MEMORY_ALLOCATION_ERROR;
  271. return nullptr;
  272. }
  273. uprv_memcpy(nameCopy, name, keyLength);
  274. allModes=localAllModes.getAlias();
  275. uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
  276. } else {
  277. // race condition
  278. allModes=(Norm2AllModes *)temp;
  279. }
  280. }
  281. }
  282. }
  283. if(allModes!=nullptr && U_SUCCESS(errorCode)) {
  284. switch(mode) {
  285. case UNORM2_COMPOSE:
  286. return &allModes->comp;
  287. case UNORM2_DECOMPOSE:
  288. return &allModes->decomp;
  289. case UNORM2_FCD:
  290. return &allModes->fcd;
  291. case UNORM2_COMPOSE_CONTIGUOUS:
  292. return &allModes->fcc;
  293. default:
  294. break; // do nothing
  295. }
  296. }
  297. return nullptr;
  298. }
  299. const Normalizer2 *
  300. Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
  301. if(U_FAILURE(errorCode)) {
  302. return nullptr;
  303. }
  304. switch(mode) {
  305. case UNORM_NFD:
  306. return Normalizer2::getNFDInstance(errorCode);
  307. case UNORM_NFKD:
  308. return Normalizer2::getNFKDInstance(errorCode);
  309. case UNORM_NFC:
  310. return Normalizer2::getNFCInstance(errorCode);
  311. case UNORM_NFKC:
  312. return Normalizer2::getNFKCInstance(errorCode);
  313. case UNORM_FCD:
  314. return getFCDInstance(errorCode);
  315. default: // UNORM_NONE
  316. return getNoopInstance(errorCode);
  317. }
  318. }
  319. const Normalizer2Impl *
  320. Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
  321. const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
  322. return allModes!=nullptr ? allModes->impl : nullptr;
  323. }
  324. const Normalizer2Impl *
  325. Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
  326. const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
  327. return allModes!=nullptr ? allModes->impl : nullptr;
  328. }
  329. U_NAMESPACE_END
  330. // C API ------------------------------------------------------------------- ***
  331. U_NAMESPACE_USE
  332. U_CAPI const UNormalizer2 * U_EXPORT2
  333. unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
  334. return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
  335. }
  336. U_CAPI const UNormalizer2 * U_EXPORT2
  337. unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
  338. return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
  339. }
  340. U_CAPI const UNormalizer2 * U_EXPORT2
  341. unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
  342. return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
  343. }
  344. U_CAPI const UNormalizer2 * U_EXPORT2
  345. unorm2_getInstance(const char *packageName,
  346. const char *name,
  347. UNormalization2Mode mode,
  348. UErrorCode *pErrorCode) {
  349. return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
  350. }
  351. U_CFUNC UNormalizationCheckResult
  352. unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
  353. if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
  354. return UNORM_YES;
  355. }
  356. UErrorCode errorCode=U_ZERO_ERROR;
  357. const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
  358. if(U_SUCCESS(errorCode)) {
  359. return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
  360. } else {
  361. return UNORM_MAYBE;
  362. }
  363. }
  364. #endif // !UCONFIG_NO_NORMALIZATION