loadednormalizer2impl.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * loadednormalizer2impl.cpp
  9. *
  10. * created on: 2014sep03
  11. * created by: Markus W. Scherer
  12. */
  13. #include "unicode/utypes.h"
  14. #if !UCONFIG_NO_NORMALIZATION
  15. #include "unicode/udata.h"
  16. #include "unicode/localpointer.h"
  17. #include "unicode/normalizer2.h"
  18. #include "unicode/ucptrie.h"
  19. #include "unicode/unistr.h"
  20. #include "unicode/unorm.h"
  21. #include "cstring.h"
  22. #include "mutex.h"
  23. #include "norm2allmodes.h"
  24. #include "normalizer2impl.h"
  25. #include "uassert.h"
  26. #include "ucln_cmn.h"
  27. #include "uhash.h"
  28. U_NAMESPACE_BEGIN
  29. class LoadedNormalizer2Impl : public Normalizer2Impl {
  30. public:
  31. LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
  32. virtual ~LoadedNormalizer2Impl();
  33. void load(const char *packageName, const char *name, UErrorCode &errorCode);
  34. private:
  35. static UBool U_CALLCONV
  36. isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
  37. UDataMemory *memory;
  38. UCPTrie *ownedTrie;
  39. };
  40. LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
  41. udata_close(memory);
  42. ucptrie_close(ownedTrie);
  43. }
  44. UBool U_CALLCONV
  45. LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
  46. const char * /* type */, const char * /*name*/,
  47. const UDataInfo *pInfo) {
  48. if(
  49. pInfo->size>=20 &&
  50. pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
  51. pInfo->charsetFamily==U_CHARSET_FAMILY &&
  52. pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
  53. pInfo->dataFormat[1]==0x72 &&
  54. pInfo->dataFormat[2]==0x6d &&
  55. pInfo->dataFormat[3]==0x32 &&
  56. pInfo->formatVersion[0]==5
  57. ) {
  58. // Normalizer2Impl *me=(Normalizer2Impl *)context;
  59. // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
  60. return true;
  61. } else {
  62. return false;
  63. }
  64. }
  65. void
  66. LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
  67. if(U_FAILURE(errorCode)) {
  68. return;
  69. }
  70. memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
  71. if(U_FAILURE(errorCode)) {
  72. return;
  73. }
  74. const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(memory));
  75. const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);
  76. int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
  77. if(indexesLength<=IX_MIN_LCCC_CP) {
  78. errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
  79. return;
  80. }
  81. int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
  82. int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
  83. ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
  84. inBytes+offset, nextOffset-offset, nullptr,
  85. &errorCode);
  86. if(U_FAILURE(errorCode)) {
  87. return;
  88. }
  89. offset=nextOffset;
  90. nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
  91. const uint16_t* inExtraData = reinterpret_cast<const uint16_t*>(inBytes + offset);
  92. // smallFCD: new in formatVersion 2
  93. offset=nextOffset;
  94. const uint8_t *inSmallFCD=inBytes+offset;
  95. init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
  96. }
  97. // instance cache ---------------------------------------------------------- ***
  98. Norm2AllModes *
  99. Norm2AllModes::createInstance(const char *packageName,
  100. const char *name,
  101. UErrorCode &errorCode) {
  102. if(U_FAILURE(errorCode)) {
  103. return nullptr;
  104. }
  105. LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
  106. if(impl==nullptr) {
  107. errorCode=U_MEMORY_ALLOCATION_ERROR;
  108. return nullptr;
  109. }
  110. impl->load(packageName, name, errorCode);
  111. return createInstance(impl, errorCode);
  112. }
  113. U_CDECL_BEGIN
  114. static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
  115. U_CDECL_END
  116. #if !NORM2_HARDCODE_NFC_DATA
  117. static Norm2AllModes *nfcSingleton;
  118. static icu::UInitOnce nfcInitOnce {};
  119. #endif
  120. static Norm2AllModes *nfkcSingleton;
  121. static icu::UInitOnce nfkcInitOnce {};
  122. static Norm2AllModes *nfkc_cfSingleton;
  123. static icu::UInitOnce nfkc_cfInitOnce {};
  124. static Norm2AllModes *nfkc_scfSingleton;
  125. static icu::UInitOnce nfkc_scfInitOnce {};
  126. static UHashtable *cache=nullptr;
  127. // UInitOnce singleton initialization function
  128. static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
  129. #if !NORM2_HARDCODE_NFC_DATA
  130. if (uprv_strcmp(what, "nfc") == 0) {
  131. nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
  132. } else
  133. #endif
  134. if (uprv_strcmp(what, "nfkc") == 0) {
  135. nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
  136. } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
  137. nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
  138. } else if (uprv_strcmp(what, "nfkc_scf") == 0) {
  139. nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode);
  140. } else {
  141. UPRV_UNREACHABLE_EXIT; // Unknown singleton
  142. }
  143. ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
  144. }
  145. U_CDECL_BEGIN
  146. static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
  147. delete (Norm2AllModes *)allModes;
  148. }
  149. static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
  150. #if !NORM2_HARDCODE_NFC_DATA
  151. delete nfcSingleton;
  152. nfcSingleton = nullptr;
  153. nfcInitOnce.reset();
  154. #endif
  155. delete nfkcSingleton;
  156. nfkcSingleton = nullptr;
  157. nfkcInitOnce.reset();
  158. delete nfkc_cfSingleton;
  159. nfkc_cfSingleton = nullptr;
  160. nfkc_cfInitOnce.reset();
  161. delete nfkc_scfSingleton;
  162. nfkc_scfSingleton = nullptr;
  163. nfkc_scfInitOnce.reset();
  164. uhash_close(cache);
  165. cache=nullptr;
  166. return true;
  167. }
  168. U_CDECL_END
  169. #if !NORM2_HARDCODE_NFC_DATA
  170. const Norm2AllModes *
  171. Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
  172. if(U_FAILURE(errorCode)) { return nullptr; }
  173. umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
  174. return nfcSingleton;
  175. }
  176. #endif
  177. const Norm2AllModes *
  178. Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
  179. if(U_FAILURE(errorCode)) { return nullptr; }
  180. umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
  181. return nfkcSingleton;
  182. }
  183. const Norm2AllModes *
  184. Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
  185. if(U_FAILURE(errorCode)) { return nullptr; }
  186. umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
  187. return nfkc_cfSingleton;
  188. }
  189. const Norm2AllModes *
  190. Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) {
  191. if(U_FAILURE(errorCode)) { return nullptr; }
  192. umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode);
  193. return nfkc_scfSingleton;
  194. }
  195. #if !NORM2_HARDCODE_NFC_DATA
  196. const Normalizer2 *
  197. Normalizer2::getNFCInstance(UErrorCode &errorCode) {
  198. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  199. return allModes!=nullptr ? &allModes->comp : nullptr;
  200. }
  201. const Normalizer2 *
  202. Normalizer2::getNFDInstance(UErrorCode &errorCode) {
  203. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  204. return allModes!=nullptr ? &allModes->decomp : nullptr;
  205. }
  206. const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
  207. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  208. return allModes!=nullptr ? &allModes->fcd : nullptr;
  209. }
  210. const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
  211. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  212. return allModes!=nullptr ? &allModes->fcc : nullptr;
  213. }
  214. const Normalizer2Impl *
  215. Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
  216. const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
  217. return allModes!=nullptr ? allModes->impl : nullptr;
  218. }
  219. #endif
  220. const Normalizer2 *
  221. Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
  222. const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
  223. return allModes!=nullptr ? &allModes->comp : nullptr;
  224. }
  225. const Normalizer2 *
  226. Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
  227. const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
  228. return allModes!=nullptr ? &allModes->decomp : nullptr;
  229. }
  230. const Normalizer2 *
  231. Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
  232. const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
  233. return allModes!=nullptr ? &allModes->comp : nullptr;
  234. }
  235. const Normalizer2 *
  236. Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) {
  237. const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
  238. return allModes!=nullptr ? &allModes->comp : nullptr;
  239. }
  240. const Normalizer2 *
  241. Normalizer2::getInstance(const char *packageName,
  242. const char *name,
  243. UNormalization2Mode mode,
  244. UErrorCode &errorCode) {
  245. if(U_FAILURE(errorCode)) {
  246. return nullptr;
  247. }
  248. if(name==nullptr || *name==0) {
  249. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  250. return nullptr;
  251. }
  252. const Norm2AllModes *allModes=nullptr;
  253. if(packageName==nullptr) {
  254. if(0==uprv_strcmp(name, "nfc")) {
  255. allModes=Norm2AllModes::getNFCInstance(errorCode);
  256. } else if(0==uprv_strcmp(name, "nfkc")) {
  257. allModes=Norm2AllModes::getNFKCInstance(errorCode);
  258. } else if(0==uprv_strcmp(name, "nfkc_cf")) {
  259. allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
  260. } else if(0==uprv_strcmp(name, "nfkc_scf")) {
  261. allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
  262. }
  263. }
  264. if(allModes==nullptr && U_SUCCESS(errorCode)) {
  265. {
  266. Mutex lock;
  267. if(cache!=nullptr) {
  268. allModes = static_cast<Norm2AllModes*>(uhash_get(cache, name));
  269. }
  270. }
  271. if(allModes==nullptr) {
  272. ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
  273. LocalPointer<Norm2AllModes> localAllModes(
  274. Norm2AllModes::createInstance(packageName, name, errorCode));
  275. if(U_SUCCESS(errorCode)) {
  276. Mutex lock;
  277. if(cache==nullptr) {
  278. cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
  279. if(U_FAILURE(errorCode)) {
  280. return nullptr;
  281. }
  282. uhash_setKeyDeleter(cache, uprv_free);
  283. uhash_setValueDeleter(cache, deleteNorm2AllModes);
  284. }
  285. void *temp=uhash_get(cache, name);
  286. if(temp==nullptr) {
  287. int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
  288. char* nameCopy = static_cast<char*>(uprv_malloc(keyLength));
  289. if(nameCopy==nullptr) {
  290. errorCode=U_MEMORY_ALLOCATION_ERROR;
  291. return nullptr;
  292. }
  293. uprv_memcpy(nameCopy, name, keyLength);
  294. allModes=localAllModes.getAlias();
  295. uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
  296. } else {
  297. // race condition
  298. allModes = static_cast<Norm2AllModes*>(temp);
  299. }
  300. }
  301. }
  302. }
  303. if(allModes!=nullptr && U_SUCCESS(errorCode)) {
  304. switch(mode) {
  305. case UNORM2_COMPOSE:
  306. return &allModes->comp;
  307. case UNORM2_DECOMPOSE:
  308. return &allModes->decomp;
  309. case UNORM2_FCD:
  310. return &allModes->fcd;
  311. case UNORM2_COMPOSE_CONTIGUOUS:
  312. return &allModes->fcc;
  313. default:
  314. break; // do nothing
  315. }
  316. }
  317. return nullptr;
  318. }
  319. const Normalizer2 *
  320. Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
  321. if(U_FAILURE(errorCode)) {
  322. return nullptr;
  323. }
  324. switch(mode) {
  325. case UNORM_NFD:
  326. return Normalizer2::getNFDInstance(errorCode);
  327. case UNORM_NFKD:
  328. return Normalizer2::getNFKDInstance(errorCode);
  329. case UNORM_NFC:
  330. return Normalizer2::getNFCInstance(errorCode);
  331. case UNORM_NFKC:
  332. return Normalizer2::getNFKCInstance(errorCode);
  333. case UNORM_FCD:
  334. return getFCDInstance(errorCode);
  335. default: // UNORM_NONE
  336. return getNoopInstance(errorCode);
  337. }
  338. }
  339. const Normalizer2Impl *
  340. Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
  341. const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
  342. return allModes!=nullptr ? allModes->impl : nullptr;
  343. }
  344. const Normalizer2Impl *
  345. Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
  346. const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
  347. return allModes!=nullptr ? allModes->impl : nullptr;
  348. }
  349. U_NAMESPACE_END
  350. // C API ------------------------------------------------------------------- ***
  351. U_NAMESPACE_USE
  352. U_CAPI const UNormalizer2 * U_EXPORT2
  353. unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
  354. return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
  355. }
  356. U_CAPI const UNormalizer2 * U_EXPORT2
  357. unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
  358. return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
  359. }
  360. U_CAPI const UNormalizer2 * U_EXPORT2
  361. unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
  362. return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
  363. }
  364. U_CAPI const UNormalizer2 * U_EXPORT2
  365. unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) {
  366. return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode);
  367. }
  368. U_CAPI const UNormalizer2 * U_EXPORT2
  369. unorm2_getInstance(const char *packageName,
  370. const char *name,
  371. UNormalization2Mode mode,
  372. UErrorCode *pErrorCode) {
  373. return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
  374. }
  375. U_CFUNC UNormalizationCheckResult
  376. unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
  377. if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
  378. return UNORM_YES;
  379. }
  380. UErrorCode errorCode=U_ZERO_ERROR;
  381. const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
  382. if(U_SUCCESS(errorCode)) {
  383. return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
  384. } else {
  385. return UNORM_MAYBE;
  386. }
  387. }
  388. #endif // !UCONFIG_NO_NORMALIZATION