udata.cpp 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. *
  6. * Copyright (C) 1999-2016, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. ******************************************************************************
  10. * file name: udata.cpp
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 1999oct25
  16. * created by: Markus W. Scherer
  17. */
  18. #include "unicode/utypes.h" /* U_PLATFORM etc. */
  19. #ifdef __GNUC__
  20. /* if gcc
  21. #define ATTRIBUTE_WEAK __attribute__ ((weak))
  22. might have to #include some other header
  23. */
  24. #endif
  25. #include "unicode/putil.h"
  26. #include "unicode/udata.h"
  27. #include "unicode/uversion.h"
  28. #include "charstr.h"
  29. #include "cmemory.h"
  30. #include "cstring.h"
  31. #include "mutex.h"
  32. #include "putilimp.h"
  33. #include "restrace.h"
  34. #include "uassert.h"
  35. #include "ucln_cmn.h"
  36. #include "ucmndata.h"
  37. #include "udatamem.h"
  38. #include "uhash.h"
  39. #include "umapfile.h"
  40. #include "umutex.h"
  41. /***********************************************************************
  42. *
  43. * Notes on the organization of the ICU data implementation
  44. *
  45. * All of the public API is defined in udata.h
  46. *
  47. * The implementation is split into several files...
  48. *
  49. * - udata.c (this file) contains higher level code that knows about
  50. * the search paths for locating data, caching opened data, etc.
  51. *
  52. * - umapfile.c contains the low level platform-specific code for actually loading
  53. * (memory mapping, file reading, whatever) data into memory.
  54. *
  55. * - ucmndata.c deals with the tables of contents of ICU data items within
  56. * an ICU common format data file. The implementation includes
  57. * an abstract interface and support for multiple TOC formats.
  58. * All knowledge of any specific TOC format is encapsulated here.
  59. *
  60. * - udatamem.c has code for managing UDataMemory structs. These are little
  61. * descriptor objects for blocks of memory holding ICU data of
  62. * various types.
  63. */
  64. /* configuration ---------------------------------------------------------- */
  65. /* If you are excruciatingly bored turn this on .. */
  66. /* #define UDATA_DEBUG 1 */
  67. #if defined(UDATA_DEBUG)
  68. # include <stdio.h>
  69. #endif
  70. U_NAMESPACE_USE
  71. /*
  72. * Forward declarations
  73. */
  74. static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err);
  75. /***********************************************************************
  76. *
  77. * static (Global) data
  78. *
  79. ************************************************************************/
  80. /*
  81. * Pointers to the common ICU data.
  82. *
  83. * We store multiple pointers to ICU data packages and iterate through them
  84. * when looking for a data item.
  85. *
  86. * It is possible to combine this with dependency inversion:
  87. * One or more data package libraries may export
  88. * functions that each return a pointer to their piece of the ICU data,
  89. * and this file would import them as weak functions, without a
  90. * strong linker dependency from the common library on the data library.
  91. *
  92. * Then we can have applications depend on only that part of ICU's data
  93. * that they really need, reducing the size of binaries that take advantage
  94. * of this.
  95. */
  96. static UDataMemory *gCommonICUDataArray[10] = { nullptr }; // Access protected by icu global mutex.
  97. static u_atomic_int32_t gHaveTriedToLoadCommonData {0}; // See extendICUData().
  98. static UHashtable *gCommonDataCache = nullptr; /* Global hash table of opened ICU data files. */
  99. static icu::UInitOnce gCommonDataCacheInitOnce {};
  100. #if !defined(ICU_DATA_DIR_WINDOWS)
  101. static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS; // Access not synchronized.
  102. // Modifying is documented as thread-unsafe.
  103. #else
  104. // If we are using the Windows data directory, then look in one spot only.
  105. static UDataFileAccess gDataFileAccess = UDATA_NO_FILES;
  106. #endif
  107. static UBool U_CALLCONV
  108. udata_cleanup()
  109. {
  110. int32_t i;
  111. if (gCommonDataCache) { /* Delete the cache of user data mappings. */
  112. uhash_close(gCommonDataCache); /* Table owns the contents, and will delete them. */
  113. gCommonDataCache = nullptr; /* Cleanup is not thread safe. */
  114. }
  115. gCommonDataCacheInitOnce.reset();
  116. for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != nullptr; ++i) {
  117. udata_close(gCommonICUDataArray[i]);
  118. gCommonICUDataArray[i] = nullptr;
  119. }
  120. gHaveTriedToLoadCommonData = 0;
  121. return true; /* Everything was cleaned up */
  122. }
  123. static UBool U_CALLCONV
  124. findCommonICUDataByName(const char *inBasename, UErrorCode &err)
  125. {
  126. UBool found = false;
  127. int32_t i;
  128. UDataMemory *pData = udata_findCachedData(inBasename, err);
  129. if (U_FAILURE(err) || pData == nullptr)
  130. return false;
  131. {
  132. Mutex lock;
  133. for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
  134. if ((gCommonICUDataArray[i] != nullptr) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) {
  135. /* The data pointer is already in the array. */
  136. found = true;
  137. break;
  138. }
  139. }
  140. }
  141. return found;
  142. }
  143. /*
  144. * setCommonICUData. Set a UDataMemory to be the global ICU Data
  145. */
  146. static UBool
  147. setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to caller, we copy it. */
  148. UBool warn, /* If true, set USING_DEFAULT warning if ICUData was */
  149. /* changed by another thread before we got to it. */
  150. UErrorCode *pErr)
  151. {
  152. UDataMemory *newCommonData = UDataMemory_createNewInstance(pErr);
  153. int32_t i;
  154. UBool didUpdate = false;
  155. if (U_FAILURE(*pErr)) {
  156. return false;
  157. }
  158. /* For the assignment, other threads must cleanly see either the old */
  159. /* or the new, not some partially initialized new. The old can not be */
  160. /* deleted - someone may still have a pointer to it lying around in */
  161. /* their locals. */
  162. UDatamemory_assign(newCommonData, pData);
  163. umtx_lock(nullptr);
  164. for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
  165. if (gCommonICUDataArray[i] == nullptr) {
  166. gCommonICUDataArray[i] = newCommonData;
  167. didUpdate = true;
  168. break;
  169. } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) {
  170. /* The same data pointer is already in the array. */
  171. break;
  172. }
  173. }
  174. umtx_unlock(nullptr);
  175. if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) {
  176. *pErr = U_USING_DEFAULT_WARNING;
  177. }
  178. if (didUpdate) {
  179. ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
  180. } else {
  181. uprv_free(newCommonData);
  182. }
  183. return didUpdate;
  184. }
  185. #if !defined(ICU_DATA_DIR_WINDOWS)
  186. static UBool
  187. setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
  188. UDataMemory tData;
  189. UDataMemory_init(&tData);
  190. UDataMemory_setData(&tData, pData);
  191. udata_checkCommonData(&tData, pErrorCode);
  192. return setCommonICUData(&tData, false, pErrorCode);
  193. }
  194. #endif
  195. static const char *
  196. findBasename(const char *path) {
  197. const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
  198. if(basename==nullptr) {
  199. return path;
  200. } else {
  201. return basename+1;
  202. }
  203. }
  204. #ifdef UDATA_DEBUG
  205. static const char *
  206. packageNameFromPath(const char *path)
  207. {
  208. if((path == nullptr) || (*path == 0)) {
  209. return U_ICUDATA_NAME;
  210. }
  211. path = findBasename(path);
  212. if((path == nullptr) || (*path == 0)) {
  213. return U_ICUDATA_NAME;
  214. }
  215. return path;
  216. }
  217. #endif
  218. /*----------------------------------------------------------------------*
  219. * *
  220. * Cache for common data *
  221. * Functions for looking up or adding entries to a cache of *
  222. * data that has been previously opened. Avoids a potentially *
  223. * expensive operation of re-opening the data for subsequent *
  224. * uses. *
  225. * *
  226. * Data remains cached for the duration of the process. *
  227. * *
  228. *----------------------------------------------------------------------*/
  229. typedef struct DataCacheElement {
  230. char *name;
  231. UDataMemory *item;
  232. } DataCacheElement;
  233. /*
  234. * Deleter function for DataCacheElements.
  235. * udata cleanup function closes the hash table; hash table in turn calls back to
  236. * here for each entry.
  237. */
  238. static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
  239. DataCacheElement *p = (DataCacheElement *)pDCEl;
  240. udata_close(p->item); /* unmaps storage */
  241. uprv_free(p->name); /* delete the hash key string. */
  242. uprv_free(pDCEl); /* delete 'this' */
  243. }
  244. static void U_CALLCONV udata_initHashTable(UErrorCode &err) {
  245. U_ASSERT(gCommonDataCache == nullptr);
  246. gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &err);
  247. if (U_FAILURE(err)) {
  248. return;
  249. }
  250. U_ASSERT(gCommonDataCache != nullptr);
  251. uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
  252. ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
  253. }
  254. /* udata_getCacheHashTable()
  255. * Get the hash table used to store the data cache entries.
  256. * Lazy create it if it doesn't yet exist.
  257. */
  258. static UHashtable *udata_getHashTable(UErrorCode &err) {
  259. umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err);
  260. return gCommonDataCache;
  261. }
  262. static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err)
  263. {
  264. UHashtable *htable;
  265. UDataMemory *retVal = nullptr;
  266. DataCacheElement *el;
  267. const char *baseName;
  268. htable = udata_getHashTable(err);
  269. if (U_FAILURE(err)) {
  270. return nullptr;
  271. }
  272. baseName = findBasename(path); /* Cache remembers only the base name, not the full path. */
  273. umtx_lock(nullptr);
  274. el = (DataCacheElement *)uhash_get(htable, baseName);
  275. umtx_unlock(nullptr);
  276. if (el != nullptr) {
  277. retVal = el->item;
  278. }
  279. #ifdef UDATA_DEBUG
  280. fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal);
  281. #endif
  282. return retVal;
  283. }
  284. static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
  285. DataCacheElement *newElement;
  286. const char *baseName;
  287. int32_t nameLen;
  288. UHashtable *htable;
  289. DataCacheElement *oldValue = nullptr;
  290. UErrorCode subErr = U_ZERO_ERROR;
  291. htable = udata_getHashTable(*pErr);
  292. if (U_FAILURE(*pErr)) {
  293. return nullptr;
  294. }
  295. /* Create a new DataCacheElement - the thingy we store in the hash table -
  296. * and copy the supplied path and UDataMemoryItems into it.
  297. */
  298. newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement));
  299. if (newElement == nullptr) {
  300. *pErr = U_MEMORY_ALLOCATION_ERROR;
  301. return nullptr;
  302. }
  303. newElement->item = UDataMemory_createNewInstance(pErr);
  304. if (U_FAILURE(*pErr)) {
  305. uprv_free(newElement);
  306. return nullptr;
  307. }
  308. UDatamemory_assign(newElement->item, item);
  309. baseName = findBasename(path);
  310. nameLen = (int32_t)uprv_strlen(baseName);
  311. newElement->name = (char *)uprv_malloc(nameLen+1);
  312. if (newElement->name == nullptr) {
  313. *pErr = U_MEMORY_ALLOCATION_ERROR;
  314. uprv_free(newElement->item);
  315. uprv_free(newElement);
  316. return nullptr;
  317. }
  318. uprv_strcpy(newElement->name, baseName);
  319. /* Stick the new DataCacheElement into the hash table.
  320. */
  321. umtx_lock(nullptr);
  322. oldValue = (DataCacheElement *)uhash_get(htable, path);
  323. if (oldValue != nullptr) {
  324. subErr = U_USING_DEFAULT_WARNING;
  325. }
  326. else {
  327. uhash_put(
  328. htable,
  329. newElement->name, /* Key */
  330. newElement, /* Value */
  331. &subErr);
  332. }
  333. umtx_unlock(nullptr);
  334. #ifdef UDATA_DEBUG
  335. fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
  336. (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs);
  337. #endif
  338. if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
  339. *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */
  340. uprv_free(newElement->name);
  341. uprv_free(newElement->item);
  342. uprv_free(newElement);
  343. return oldValue ? oldValue->item : nullptr;
  344. }
  345. return newElement->item;
  346. }
  347. /*----------------------------------------------------------------------*==============
  348. * *
  349. * Path management. Could be shared with other tools/etc if need be *
  350. * later on. *
  351. * *
  352. *----------------------------------------------------------------------*/
  353. U_NAMESPACE_BEGIN
  354. class UDataPathIterator
  355. {
  356. public:
  357. UDataPathIterator(const char *path, const char *pkg,
  358. const char *item, const char *suffix, UBool doCheckLastFour,
  359. UErrorCode *pErrorCode);
  360. const char *next(UErrorCode *pErrorCode);
  361. private:
  362. const char *path; /* working path (u_icudata_Dir) */
  363. const char *nextPath; /* path following this one */
  364. const char *basename; /* item's basename (icudt22e_mt.res)*/
  365. StringPiece suffix; /* item suffix (can be null) */
  366. uint32_t basenameLen; /* length of basename */
  367. CharString itemPath; /* path passed in with item name */
  368. CharString pathBuffer; /* output path for this it'ion */
  369. CharString packageStub; /* example: "/icudt28b". Will ignore that leaf in set paths. */
  370. UBool checkLastFour; /* if true then allow paths such as '/foo/myapp.dat'
  371. * to match, checks last 4 chars of suffix with
  372. * last 4 of path, then previous chars. */
  373. };
  374. /**
  375. * @param iter The iterator to be initialized. Its current state does not matter.
  376. * @param inPath The full pathname to be iterated over. If nullptr, defaults to U_ICUDATA_NAME
  377. * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leaf directories such as /icudt28l
  378. * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
  379. * @param inSuffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
  380. * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
  381. * '/blarg/stuff.dat' would also be found.
  382. * Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case
  383. * the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr").
  384. */
  385. UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
  386. const char *item, const char *inSuffix, UBool doCheckLastFour,
  387. UErrorCode *pErrorCode)
  388. {
  389. #ifdef UDATA_DEBUG
  390. fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath);
  391. #endif
  392. /** Path **/
  393. if(inPath == nullptr) {
  394. path = u_getDataDirectory();
  395. } else {
  396. path = inPath;
  397. }
  398. /** Package **/
  399. if(pkg != nullptr) {
  400. packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode);
  401. #ifdef UDATA_DEBUG
  402. fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length());
  403. #endif
  404. }
  405. /** Item **/
  406. basename = findBasename(item);
  407. basenameLen = (int32_t)uprv_strlen(basename);
  408. /** Item path **/
  409. if(basename == item) {
  410. nextPath = path;
  411. } else {
  412. itemPath.append(item, (int32_t)(basename-item), *pErrorCode);
  413. nextPath = itemPath.data();
  414. }
  415. #ifdef UDATA_DEBUG
  416. fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix);
  417. #endif
  418. /** Suffix **/
  419. if(inSuffix != nullptr) {
  420. suffix = inSuffix;
  421. } else {
  422. suffix = "";
  423. }
  424. checkLastFour = doCheckLastFour;
  425. /* pathBuffer will hold the output path strings returned by this iterator */
  426. #ifdef UDATA_DEBUG
  427. fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
  428. item,
  429. path,
  430. basename,
  431. suffix.data(),
  432. itemPath.data(),
  433. nextPath,
  434. checkLastFour?"true":"false");
  435. #endif
  436. }
  437. /**
  438. * Get the next path on the list.
  439. *
  440. * @param iter The Iter to be used
  441. * @param len If set, pointer to the length of the returned path, for convenience.
  442. * @return Pointer to the next path segment, or nullptr if there are no more.
  443. */
  444. const char *UDataPathIterator::next(UErrorCode *pErrorCode)
  445. {
  446. if(U_FAILURE(*pErrorCode)) {
  447. return nullptr;
  448. }
  449. const char *currentPath = nullptr;
  450. int32_t pathLen = 0;
  451. const char *pathBasename;
  452. do
  453. {
  454. if( nextPath == nullptr ) {
  455. break;
  456. }
  457. currentPath = nextPath;
  458. if(nextPath == itemPath.data()) { /* we were processing item's path. */
  459. nextPath = path; /* start with regular path next tm. */
  460. pathLen = (int32_t)uprv_strlen(currentPath);
  461. } else {
  462. /* fix up next for next time */
  463. nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR);
  464. if(nextPath == nullptr) {
  465. /* segment: entire path */
  466. pathLen = (int32_t)uprv_strlen(currentPath);
  467. } else {
  468. /* segment: until next segment */
  469. pathLen = (int32_t)(nextPath - currentPath);
  470. /* skip divider */
  471. nextPath ++;
  472. }
  473. }
  474. if(pathLen == 0) {
  475. continue;
  476. }
  477. #ifdef UDATA_DEBUG
  478. fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
  479. fprintf(stderr, " ");
  480. {
  481. int32_t qqq;
  482. for(qqq=0;qqq<pathLen;qqq++)
  483. {
  484. fprintf(stderr, " ");
  485. }
  486. fprintf(stderr, "^\n");
  487. }
  488. #endif
  489. pathBuffer.clear().append(currentPath, pathLen, *pErrorCode);
  490. /* check for .dat files */
  491. pathBasename = findBasename(pathBuffer.data());
  492. if(checkLastFour &&
  493. (pathLen>=4) &&
  494. uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */
  495. uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */
  496. uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
  497. #ifdef UDATA_DEBUG
  498. fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data());
  499. #endif
  500. /* do nothing */
  501. }
  502. else
  503. { /* regular dir path */
  504. if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) {
  505. if((pathLen>=4) &&
  506. uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0)
  507. {
  508. #ifdef UDATA_DEBUG
  509. fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data());
  510. #endif
  511. continue;
  512. }
  513. /* Check if it is a directory with the same name as our package */
  514. if(!packageStub.isEmpty() &&
  515. (pathLen > packageStub.length()) &&
  516. !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) {
  517. #ifdef UDATA_DEBUG
  518. fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen);
  519. #endif
  520. pathBuffer.truncate(pathLen - packageStub.length());
  521. }
  522. pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode);
  523. }
  524. /* + basename */
  525. pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
  526. if (!suffix.empty()) /* tack on suffix */
  527. {
  528. if (suffix.length() > 4) {
  529. // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res")
  530. // then we need to ensure that the path ends with a separator.
  531. pathBuffer.ensureEndsWithFileSeparator(*pErrorCode);
  532. }
  533. pathBuffer.append(suffix, *pErrorCode);
  534. }
  535. }
  536. #ifdef UDATA_DEBUG
  537. fprintf(stderr, " --> %s\n", pathBuffer.data());
  538. #endif
  539. return pathBuffer.data();
  540. } while(path);
  541. /* fell way off the end */
  542. return nullptr;
  543. }
  544. U_NAMESPACE_END
  545. /* ==================================================================================*/
  546. /*----------------------------------------------------------------------*
  547. * *
  548. * Add a static reference to the common data library *
  549. * Unless overridden by an explicit udata_setCommonData, this will be *
  550. * our common data. *
  551. * *
  552. *----------------------------------------------------------------------*/
  553. #if !defined(ICU_DATA_DIR_WINDOWS)
  554. // When using the Windows system data, we expect only a single data file.
  555. extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT;
  556. #endif
  557. /*
  558. * This would be a good place for weak-linkage declarations of
  559. * partial-data-library access functions where each returns a pointer
  560. * to its data package, if it is linked in.
  561. */
  562. /*
  563. extern const void *uprv_getICUData_collation() ATTRIBUTE_WEAK;
  564. extern const void *uprv_getICUData_conversion() ATTRIBUTE_WEAK;
  565. */
  566. /*----------------------------------------------------------------------*
  567. * *
  568. * openCommonData Attempt to open a common format (.dat) file *
  569. * Map it into memory (if it's not there already) *
  570. * and return a UDataMemory object for it. *
  571. * *
  572. * If the requested data is already open and cached *
  573. * just return the cached UDataMem object. *
  574. * *
  575. *----------------------------------------------------------------------*/
  576. static UDataMemory *
  577. openCommonData(const char *path, /* Path from OpenChoice? */
  578. int32_t commonDataIndex, /* ICU Data (index >= 0) if path == nullptr */
  579. UErrorCode *pErrorCode)
  580. {
  581. UDataMemory tData;
  582. const char *pathBuffer;
  583. const char *inBasename;
  584. if (U_FAILURE(*pErrorCode)) {
  585. return nullptr;
  586. }
  587. UDataMemory_init(&tData);
  588. /* ??????? TODO revisit this */
  589. if (commonDataIndex >= 0) {
  590. /* "mini-cache" for common ICU data */
  591. if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) {
  592. return nullptr;
  593. }
  594. {
  595. Mutex lock;
  596. if(gCommonICUDataArray[commonDataIndex] != nullptr) {
  597. return gCommonICUDataArray[commonDataIndex];
  598. }
  599. #if !defined(ICU_DATA_DIR_WINDOWS)
  600. // When using the Windows system data, we expect only a single data file.
  601. int32_t i;
  602. for(i = 0; i < commonDataIndex; ++i) {
  603. if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) {
  604. /* The linked-in data is already in the list. */
  605. return nullptr;
  606. }
  607. }
  608. #endif
  609. }
  610. /* Add the linked-in data to the list. */
  611. /*
  612. * This is where we would check and call weakly linked partial-data-library
  613. * access functions.
  614. */
  615. /*
  616. if (uprv_getICUData_collation) {
  617. setCommonICUDataPointer(uprv_getICUData_collation(), false, pErrorCode);
  618. }
  619. if (uprv_getICUData_conversion) {
  620. setCommonICUDataPointer(uprv_getICUData_conversion(), false, pErrorCode);
  621. }
  622. */
  623. #if !defined(ICU_DATA_DIR_WINDOWS)
  624. // When using the Windows system data, we expect only a single data file.
  625. setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, false, pErrorCode);
  626. {
  627. Mutex lock;
  628. return gCommonICUDataArray[commonDataIndex];
  629. }
  630. #endif
  631. }
  632. /* request is NOT for ICU Data. */
  633. /* Find the base name portion of the supplied path. */
  634. /* inBasename will be left pointing somewhere within the original path string. */
  635. inBasename = findBasename(path);
  636. #ifdef UDATA_DEBUG
  637. fprintf(stderr, "inBasename = %s\n", inBasename);
  638. #endif
  639. if(*inBasename==0) {
  640. /* no basename. This will happen if the original path was a directory name, */
  641. /* like "a/b/c/". (Fallback to separate files will still work.) */
  642. #ifdef UDATA_DEBUG
  643. fprintf(stderr, "ocd: no basename in %s, bailing.\n", path);
  644. #endif
  645. if (U_SUCCESS(*pErrorCode)) {
  646. *pErrorCode=U_FILE_ACCESS_ERROR;
  647. }
  648. return nullptr;
  649. }
  650. /* Is the requested common data file already open and cached? */
  651. /* Note that the cache is keyed by the base name only. The rest of the path, */
  652. /* if any, is not considered. */
  653. UDataMemory *dataToReturn = udata_findCachedData(inBasename, *pErrorCode);
  654. if (dataToReturn != nullptr || U_FAILURE(*pErrorCode)) {
  655. return dataToReturn;
  656. }
  657. /* Requested item is not in the cache.
  658. * Hunt it down, trying all the path locations
  659. */
  660. UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", true, pErrorCode);
  661. while ((UDataMemory_isLoaded(&tData)==false) && (pathBuffer = iter.next(pErrorCode)) != nullptr)
  662. {
  663. #ifdef UDATA_DEBUG
  664. fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
  665. #endif
  666. uprv_mapFile(&tData, pathBuffer, pErrorCode);
  667. #ifdef UDATA_DEBUG
  668. fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
  669. #endif
  670. }
  671. if (U_FAILURE(*pErrorCode)) {
  672. return nullptr;
  673. }
  674. #if defined(OS390_STUBDATA) && defined(OS390BATCH)
  675. if (!UDataMemory_isLoaded(&tData)) {
  676. char ourPathBuffer[1024];
  677. /* One more chance, for extendCommonData() */
  678. uprv_strncpy(ourPathBuffer, path, 1019);
  679. ourPathBuffer[1019]=0;
  680. uprv_strcat(ourPathBuffer, ".dat");
  681. uprv_mapFile(&tData, ourPathBuffer, pErrorCode);
  682. }
  683. #endif
  684. if (U_FAILURE(*pErrorCode)) {
  685. return nullptr;
  686. }
  687. if (!UDataMemory_isLoaded(&tData)) {
  688. /* no common data */
  689. *pErrorCode=U_FILE_ACCESS_ERROR;
  690. return nullptr;
  691. }
  692. /* we have mapped a file, check its header */
  693. udata_checkCommonData(&tData, pErrorCode);
  694. /* Cache the UDataMemory struct for this .dat file,
  695. * so we won't need to hunt it down and map it again next time
  696. * something is needed from it. */
  697. return udata_cacheDataItem(inBasename, &tData, pErrorCode);
  698. }
  699. /*----------------------------------------------------------------------*
  700. * *
  701. * extendICUData If the full set of ICU data was not loaded at *
  702. * program startup, load it now. This function will *
  703. * be called when the lookup of an ICU data item in *
  704. * the common ICU data fails. *
  705. * *
  706. * return true if new data is loaded, false otherwise.*
  707. * *
  708. *----------------------------------------------------------------------*/
  709. static UBool extendICUData(UErrorCode *pErr)
  710. {
  711. UDataMemory *pData;
  712. UDataMemory copyPData;
  713. UBool didUpdate = false;
  714. /*
  715. * There is a chance for a race condition here.
  716. * Normally, ICU data is loaded from a DLL or via mmap() and
  717. * setCommonICUData() will detect if the same address is set twice.
  718. * If ICU is built with data loading via fread() then the address will
  719. * be different each time the common data is loaded and we may add
  720. * multiple copies of the data.
  721. * In this case, use a mutex to prevent the race.
  722. * Use a specific mutex to avoid nested locks of the global mutex.
  723. */
  724. #if MAP_IMPLEMENTATION==MAP_STDIO
  725. static UMutex extendICUDataMutex;
  726. umtx_lock(&extendICUDataMutex);
  727. #endif
  728. if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) {
  729. /* See if we can explicitly open a .dat file for the ICUData. */
  730. pData = openCommonData(
  731. U_ICUDATA_NAME, /* "icudt20l" , for example. */
  732. -1, /* Pretend we're not opening ICUData */
  733. pErr);
  734. /* How about if there is no pData, eh... */
  735. UDataMemory_init(&copyPData);
  736. if(pData != nullptr) {
  737. UDatamemory_assign(&copyPData, pData);
  738. copyPData.map = 0; /* The mapping for this data is owned by the hash table */
  739. copyPData.mapAddr = 0; /* which will unmap it when ICU is shut down. */
  740. /* CommonICUData is also unmapped when ICU is shut down.*/
  741. /* To avoid unmapping the data twice, zero out the map */
  742. /* fields in the UDataMemory that we're assigning */
  743. /* to CommonICUData. */
  744. didUpdate = /* no longer using this result */
  745. setCommonICUData(&copyPData,/* The new common data. */
  746. false, /* No warnings if write didn't happen */
  747. pErr); /* setCommonICUData honors errors; NOP if error set */
  748. }
  749. umtx_storeRelease(gHaveTriedToLoadCommonData, 1);
  750. }
  751. didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr); /* Return 'true' when a racing writes out the extended */
  752. /* data after another thread has failed to see it (in openCommonData), so */
  753. /* extended data can be examined. */
  754. /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */
  755. #if MAP_IMPLEMENTATION==MAP_STDIO
  756. umtx_unlock(&extendICUDataMutex);
  757. #endif
  758. return didUpdate; /* Return true if ICUData pointer was updated. */
  759. /* (Could potentially have been done by another thread racing */
  760. /* us through here, but that's fine, we still return true */
  761. /* so that current thread will also examine extended data. */
  762. }
  763. /*----------------------------------------------------------------------*
  764. * *
  765. * udata_setCommonData *
  766. * *
  767. *----------------------------------------------------------------------*/
  768. U_CAPI void U_EXPORT2
  769. udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
  770. UDataMemory dataMemory;
  771. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  772. return;
  773. }
  774. if(data==nullptr) {
  775. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  776. return;
  777. }
  778. /* set the data pointer and test for validity */
  779. UDataMemory_init(&dataMemory);
  780. UDataMemory_setData(&dataMemory, data);
  781. udata_checkCommonData(&dataMemory, pErrorCode);
  782. if (U_FAILURE(*pErrorCode)) {return;}
  783. /* we have good data */
  784. /* Set it up as the ICU Common Data. */
  785. setCommonICUData(&dataMemory, true, pErrorCode);
  786. }
  787. /*---------------------------------------------------------------------------
  788. *
  789. * udata_setAppData
  790. *
  791. *---------------------------------------------------------------------------- */
  792. U_CAPI void U_EXPORT2
  793. udata_setAppData(const char *path, const void *data, UErrorCode *err)
  794. {
  795. UDataMemory udm;
  796. if(err==nullptr || U_FAILURE(*err)) {
  797. return;
  798. }
  799. if(data==nullptr) {
  800. *err=U_ILLEGAL_ARGUMENT_ERROR;
  801. return;
  802. }
  803. UDataMemory_init(&udm);
  804. UDataMemory_setData(&udm, data);
  805. udata_checkCommonData(&udm, err);
  806. udata_cacheDataItem(path, &udm, err);
  807. }
  808. /*----------------------------------------------------------------------------*
  809. * *
  810. * checkDataItem Given a freshly located/loaded data item, either *
  811. * an entry in a common file or a separately loaded file, *
  812. * sanity check its header, and see if the data is *
  813. * acceptable to the app. *
  814. * If the data is good, create and return a UDataMemory *
  815. * object that can be returned to the application. *
  816. * Return nullptr on any sort of failure. *
  817. * *
  818. *----------------------------------------------------------------------------*/
  819. static UDataMemory *
  820. checkDataItem
  821. (
  822. const DataHeader *pHeader, /* The data item to be checked. */
  823. UDataMemoryIsAcceptable *isAcceptable, /* App's call-back function */
  824. void *context, /* pass-thru param for above. */
  825. const char *type, /* pass-thru param for above. */
  826. const char *name, /* pass-thru param for above. */
  827. UErrorCode *nonFatalErr, /* Error code if this data was not acceptable */
  828. /* but openChoice should continue with */
  829. /* trying to get data from fallback path. */
  830. UErrorCode *fatalErr /* Bad error, caller should return immediately */
  831. )
  832. {
  833. UDataMemory *rDataMem = nullptr; /* the new UDataMemory, to be returned. */
  834. if (U_FAILURE(*fatalErr)) {
  835. return nullptr;
  836. }
  837. if(pHeader->dataHeader.magic1==0xda &&
  838. pHeader->dataHeader.magic2==0x27 &&
  839. (isAcceptable==nullptr || isAcceptable(context, type, name, &pHeader->info))
  840. ) {
  841. rDataMem=UDataMemory_createNewInstance(fatalErr);
  842. if (U_FAILURE(*fatalErr)) {
  843. return nullptr;
  844. }
  845. rDataMem->pHeader = pHeader;
  846. } else {
  847. /* the data is not acceptable, look further */
  848. /* If we eventually find something good, this errorcode will be */
  849. /* cleared out. */
  850. *nonFatalErr=U_INVALID_FORMAT_ERROR;
  851. }
  852. return rDataMem;
  853. }
  854. /**
  855. * @return 0 if not loaded, 1 if loaded or err
  856. */
  857. static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
  858. const char *dataPath, const char *tocEntryPathSuffix,
  859. /* following arguments are the same as doOpenChoice itself */
  860. const char *path, const char *type, const char *name,
  861. UDataMemoryIsAcceptable *isAcceptable, void *context,
  862. UErrorCode *subErrorCode,
  863. UErrorCode *pErrorCode)
  864. {
  865. const char *pathBuffer;
  866. UDataMemory dataMemory;
  867. UDataMemory *pEntryData;
  868. /* look in ind. files: package\nam.typ ========================= */
  869. /* init path iterator for individual files */
  870. UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, false, pErrorCode);
  871. while ((pathBuffer = iter.next(pErrorCode)) != nullptr)
  872. {
  873. #ifdef UDATA_DEBUG
  874. fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
  875. #endif
  876. if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode))
  877. {
  878. pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
  879. if (pEntryData != nullptr) {
  880. /* Data is good.
  881. * Hand off ownership of the backing memory to the user's UDataMemory.
  882. * and return it. */
  883. pEntryData->mapAddr = dataMemory.mapAddr;
  884. pEntryData->map = dataMemory.map;
  885. #ifdef UDATA_DEBUG
  886. fprintf(stderr, "** Mapped file: %s\n", pathBuffer);
  887. #endif
  888. return pEntryData;
  889. }
  890. /* the data is not acceptable, or some error occurred. Either way, unmap the memory */
  891. udata_close(&dataMemory);
  892. /* If we had a nasty error, bail out completely. */
  893. if (U_FAILURE(*pErrorCode)) {
  894. return nullptr;
  895. }
  896. /* Otherwise remember that we found data but didn't like it for some reason */
  897. *subErrorCode=U_INVALID_FORMAT_ERROR;
  898. }
  899. #ifdef UDATA_DEBUG
  900. fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded");
  901. #endif
  902. }
  903. return nullptr;
  904. }
  905. /**
  906. * @return 0 if not loaded, 1 if loaded or err
  907. */
  908. static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/,
  909. const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName,
  910. /* following arguments are the same as doOpenChoice itself */
  911. const char *path, const char *type, const char *name,
  912. UDataMemoryIsAcceptable *isAcceptable, void *context,
  913. UErrorCode *subErrorCode,
  914. UErrorCode *pErrorCode)
  915. {
  916. UDataMemory *pEntryData;
  917. const DataHeader *pHeader;
  918. UDataMemory *pCommonData;
  919. int32_t commonDataIndex;
  920. UBool checkedExtendedICUData = false;
  921. /* try to get common data. The loop is for platforms such as the 390 that do
  922. * not initially load the full set of ICU data. If the lookup of an ICU data item
  923. * fails, the full (but slower to load) set is loaded, the and the loop repeats,
  924. * trying the lookup again. Once the full set of ICU data is loaded, the loop wont
  925. * repeat because the full set will be checked the first time through.
  926. *
  927. * The loop also handles the fallback to a .dat file if the application linked
  928. * to the stub data library rather than a real library.
  929. */
  930. for (commonDataIndex = isICUData ? 0 : -1;;) {
  931. pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/
  932. if(U_SUCCESS(*subErrorCode) && pCommonData!=nullptr) {
  933. int32_t length;
  934. /* look up the data piece in the common data */
  935. pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
  936. #ifdef UDATA_DEBUG
  937. fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode));
  938. #endif
  939. if(pHeader!=nullptr) {
  940. pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
  941. #ifdef UDATA_DEBUG
  942. fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData);
  943. #endif
  944. if (U_FAILURE(*pErrorCode)) {
  945. return nullptr;
  946. }
  947. if (pEntryData != nullptr) {
  948. pEntryData->length = length;
  949. return pEntryData;
  950. }
  951. }
  952. }
  953. // If we failed due to being out-of-memory, then stop early and report the error.
  954. if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) {
  955. *pErrorCode = *subErrorCode;
  956. return nullptr;
  957. }
  958. /* Data wasn't found. If we were looking for an ICUData item and there is
  959. * more data available, load it and try again,
  960. * otherwise break out of this loop. */
  961. if (!isICUData) {
  962. return nullptr;
  963. } else if (pCommonData != nullptr) {
  964. ++commonDataIndex; /* try the next data package */
  965. } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) {
  966. checkedExtendedICUData = true;
  967. /* try this data package slot again: it changed from nullptr to non-nullptr */
  968. } else {
  969. return nullptr;
  970. }
  971. }
  972. }
  973. /*
  974. * Identify the Time Zone resources that are subject to special override data loading.
  975. */
  976. static UBool isTimeZoneFile(const char *name, const char *type) {
  977. return ((uprv_strcmp(type, "res") == 0) &&
  978. (uprv_strcmp(name, "zoneinfo64") == 0 ||
  979. uprv_strcmp(name, "timezoneTypes") == 0 ||
  980. uprv_strcmp(name, "windowsZones") == 0 ||
  981. uprv_strcmp(name, "metaZones") == 0));
  982. }
  983. /*
  984. * A note on the ownership of Mapped Memory
  985. *
  986. * For common format files, ownership resides with the UDataMemory object
  987. * that lives in the cache of opened common data. These UDataMemorys are private
  988. * to the udata implementation, and are never seen directly by users.
  989. *
  990. * The UDataMemory objects returned to users will have the address of some desired
  991. * data within the mapped region, but they wont have the mapping info itself, and thus
  992. * won't cause anything to be removed from memory when they are closed.
  993. *
  994. * For individual data files, the UDataMemory returned to the user holds the
  995. * information necessary to unmap the data on close. If the user independently
  996. * opens the same data file twice, two completely independent mappings will be made.
  997. * (There is no cache of opened data items from individual files, only a cache of
  998. * opened Common Data files, that is, files containing a collection of data items.)
  999. *
  1000. * For common data passed in from the user via udata_setAppData() or
  1001. * udata_setCommonData(), ownership remains with the user.
  1002. *
  1003. * UDataMemory objects themselves, as opposed to the memory they describe,
  1004. * can be anywhere - heap, stack/local or global.
  1005. * They have a flag to indicate when they're heap allocated and thus
  1006. * must be deleted when closed.
  1007. */
  1008. /*----------------------------------------------------------------------------*
  1009. * *
  1010. * main data loading functions *
  1011. * *
  1012. *----------------------------------------------------------------------------*/
  1013. static UDataMemory *
  1014. doOpenChoice(const char *path, const char *type, const char *name,
  1015. UDataMemoryIsAcceptable *isAcceptable, void *context,
  1016. UErrorCode *pErrorCode)
  1017. {
  1018. UDataMemory *retVal = nullptr;
  1019. const char *dataPath;
  1020. int32_t tocEntrySuffixIndex;
  1021. const char *tocEntryPathSuffix;
  1022. UErrorCode subErrorCode=U_ZERO_ERROR;
  1023. const char *treeChar;
  1024. UBool isICUData = false;
  1025. FileTracer::traceOpen(path, type, name);
  1026. /* Is this path ICU data? */
  1027. if(path == nullptr ||
  1028. !strcmp(path, U_ICUDATA_ALIAS) || /* "ICUDATA" */
  1029. !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */
  1030. uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) ||
  1031. !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */
  1032. uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) {
  1033. isICUData = true;
  1034. }
  1035. #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* Windows: try "foo\bar" and "foo/bar" */
  1036. /* remap from alternate path char to the main one */
  1037. CharString altSepPath;
  1038. if(path) {
  1039. if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != nullptr) {
  1040. altSepPath.append(path, *pErrorCode);
  1041. char *p;
  1042. while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != nullptr) {
  1043. *p = U_FILE_SEP_CHAR;
  1044. }
  1045. #if defined (UDATA_DEBUG)
  1046. fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s);
  1047. #endif
  1048. path = altSepPath.data();
  1049. }
  1050. }
  1051. #endif
  1052. CharString tocEntryName; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */
  1053. CharString tocEntryPath; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */
  1054. CharString pkgName;
  1055. CharString treeName;
  1056. /* ======= Set up strings */
  1057. if(path==nullptr) {
  1058. pkgName.append(U_ICUDATA_NAME, *pErrorCode);
  1059. } else {
  1060. const char *pkg;
  1061. const char *first;
  1062. pkg = uprv_strrchr(path, U_FILE_SEP_CHAR);
  1063. first = uprv_strchr(path, U_FILE_SEP_CHAR);
  1064. if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */
  1065. /* see if this is an /absolute/path/to/package path */
  1066. if(pkg) {
  1067. pkgName.append(pkg+1, *pErrorCode);
  1068. } else {
  1069. pkgName.append(path, *pErrorCode);
  1070. }
  1071. } else {
  1072. treeChar = uprv_strchr(path, U_TREE_SEPARATOR);
  1073. if(treeChar) {
  1074. treeName.append(treeChar+1, *pErrorCode); /* following '-' */
  1075. if(isICUData) {
  1076. pkgName.append(U_ICUDATA_NAME, *pErrorCode);
  1077. } else {
  1078. pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode);
  1079. if (first == nullptr) {
  1080. /*
  1081. This user data has no path, but there is a tree name.
  1082. Look up the correct path from the data cache later.
  1083. */
  1084. path = pkgName.data();
  1085. }
  1086. }
  1087. } else {
  1088. if(isICUData) {
  1089. pkgName.append(U_ICUDATA_NAME, *pErrorCode);
  1090. } else {
  1091. pkgName.append(path, *pErrorCode);
  1092. }
  1093. }
  1094. }
  1095. }
  1096. #ifdef UDATA_DEBUG
  1097. fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data());
  1098. #endif
  1099. /* setting up the entry name and file name
  1100. * Make up a full name by appending the type to the supplied
  1101. * name, assuming that a type was supplied.
  1102. */
  1103. /* prepend the package */
  1104. tocEntryName.append(pkgName, *pErrorCode);
  1105. tocEntryPath.append(pkgName, *pErrorCode);
  1106. tocEntrySuffixIndex = tocEntryName.length();
  1107. if(!treeName.isEmpty()) {
  1108. tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
  1109. tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
  1110. }
  1111. tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
  1112. tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
  1113. if(type!=nullptr && *type!=0) {
  1114. tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
  1115. tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
  1116. }
  1117. // The +1 is for the U_FILE_SEP_CHAR that is always appended above.
  1118. tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */
  1119. #ifdef UDATA_DEBUG
  1120. fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
  1121. fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
  1122. #endif
  1123. #if !defined(ICU_DATA_DIR_WINDOWS)
  1124. if(path == nullptr) {
  1125. path = COMMON_DATA_NAME; /* "icudt26e" */
  1126. }
  1127. #else
  1128. // When using the Windows system data, we expects only a single data file.
  1129. path = COMMON_DATA_NAME; /* "icudt26e" */
  1130. #endif
  1131. /************************ Begin loop looking for ind. files ***************/
  1132. #ifdef UDATA_DEBUG
  1133. fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path));
  1134. #endif
  1135. /* End of dealing with a null basename */
  1136. dataPath = u_getDataDirectory();
  1137. /**** Time zone individual files override */
  1138. if (isICUData && isTimeZoneFile(name, type)) {
  1139. const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode);
  1140. if (tzFilesDir[0] != 0) {
  1141. #ifdef UDATA_DEBUG
  1142. fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir);
  1143. #endif
  1144. retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix,
  1145. /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode);
  1146. if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
  1147. return retVal;
  1148. }
  1149. }
  1150. }
  1151. /**** COMMON PACKAGE - only if packages are first. */
  1152. if(gDataFileAccess == UDATA_PACKAGES_FIRST) {
  1153. #ifdef UDATA_DEBUG
  1154. fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n");
  1155. #endif
  1156. /* #2 */
  1157. retVal = doLoadFromCommonData(isICUData,
  1158. pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
  1159. path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
  1160. if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
  1161. return retVal;
  1162. }
  1163. }
  1164. /**** INDIVIDUAL FILES */
  1165. if((gDataFileAccess==UDATA_PACKAGES_FIRST) ||
  1166. (gDataFileAccess==UDATA_FILES_FIRST)) {
  1167. #ifdef UDATA_DEBUG
  1168. fprintf(stderr, "Trying individual files\n");
  1169. #endif
  1170. /* Check to make sure that there is a dataPath to iterate over */
  1171. if ((dataPath && *dataPath) || !isICUData) {
  1172. retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix,
  1173. path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
  1174. if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
  1175. return retVal;
  1176. }
  1177. }
  1178. }
  1179. /**** COMMON PACKAGE */
  1180. if((gDataFileAccess==UDATA_ONLY_PACKAGES) ||
  1181. (gDataFileAccess==UDATA_FILES_FIRST)) {
  1182. #ifdef UDATA_DEBUG
  1183. fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
  1184. #endif
  1185. retVal = doLoadFromCommonData(isICUData,
  1186. pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
  1187. path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
  1188. if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
  1189. return retVal;
  1190. }
  1191. }
  1192. /* Load from DLL. If we haven't attempted package load, we also haven't had any chance to
  1193. try a DLL (static or setCommonData/etc) load.
  1194. If we ever have a "UDATA_ONLY_FILES", add it to the or list here. */
  1195. if(gDataFileAccess==UDATA_NO_FILES) {
  1196. #ifdef UDATA_DEBUG
  1197. fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n");
  1198. #endif
  1199. retVal = doLoadFromCommonData(isICUData,
  1200. pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(),
  1201. path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
  1202. if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
  1203. return retVal;
  1204. }
  1205. }
  1206. /* data not found */
  1207. if(U_SUCCESS(*pErrorCode)) {
  1208. if(U_SUCCESS(subErrorCode)) {
  1209. /* file not found */
  1210. *pErrorCode=U_FILE_ACCESS_ERROR;
  1211. } else {
  1212. /* entry point not found or rejected */
  1213. *pErrorCode=subErrorCode;
  1214. }
  1215. }
  1216. return retVal;
  1217. }
  1218. /* API ---------------------------------------------------------------------- */
  1219. U_CAPI UDataMemory * U_EXPORT2
  1220. udata_open(const char *path, const char *type, const char *name,
  1221. UErrorCode *pErrorCode) {
  1222. #ifdef UDATA_DEBUG
  1223. fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"nullptr"), name, type);
  1224. fflush(stderr);
  1225. #endif
  1226. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  1227. return nullptr;
  1228. } else if(name==nullptr || *name==0) {
  1229. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1230. return nullptr;
  1231. } else {
  1232. return doOpenChoice(path, type, name, nullptr, nullptr, pErrorCode);
  1233. }
  1234. }
  1235. U_CAPI UDataMemory * U_EXPORT2
  1236. udata_openChoice(const char *path, const char *type, const char *name,
  1237. UDataMemoryIsAcceptable *isAcceptable, void *context,
  1238. UErrorCode *pErrorCode) {
  1239. #ifdef UDATA_DEBUG
  1240. fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"nullptr"), name, type);
  1241. #endif
  1242. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  1243. return nullptr;
  1244. } else if(name==nullptr || *name==0 || isAcceptable==nullptr) {
  1245. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1246. return nullptr;
  1247. } else {
  1248. return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
  1249. }
  1250. }
  1251. U_CAPI void U_EXPORT2
  1252. udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
  1253. if(pInfo!=nullptr) {
  1254. if(pData!=nullptr && pData->pHeader!=nullptr) {
  1255. const UDataInfo *info=&pData->pHeader->info;
  1256. uint16_t dataInfoSize=udata_getInfoSize(info);
  1257. if(pInfo->size>dataInfoSize) {
  1258. pInfo->size=dataInfoSize;
  1259. }
  1260. uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2);
  1261. if(info->isBigEndian!=U_IS_BIG_ENDIAN) {
  1262. /* opposite endianness */
  1263. uint16_t x=info->reservedWord;
  1264. pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8));
  1265. }
  1266. } else {
  1267. pInfo->size=0;
  1268. }
  1269. }
  1270. }
  1271. U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/)
  1272. {
  1273. // Note: this function is documented as not thread safe.
  1274. gDataFileAccess = access;
  1275. }