ucnv_io.cpp 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. *
  6. * Copyright (C) 1999-2015, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. ******************************************************************************
  10. *
  11. *
  12. * ucnv_io.cpp:
  13. * initializes global variables and defines functions pertaining to converter
  14. * name resolution aspect of the conversion code.
  15. *
  16. * new implementation:
  17. *
  18. * created on: 1999nov22
  19. * created by: Markus W. Scherer
  20. *
  21. * Use the binary cnvalias.icu (created from convrtrs.txt) to work
  22. * with aliases for converter names.
  23. *
  24. * Date Name Description
  25. * 11/22/1999 markus Created
  26. * 06/28/2002 grhoten Major overhaul of the converter alias design.
  27. * Now an alias can map to different converters
  28. * depending on the specified standard.
  29. *******************************************************************************
  30. */
  31. #include "unicode/utypes.h"
  32. #if !UCONFIG_NO_CONVERSION
  33. #include "unicode/ucnv.h"
  34. #include "unicode/udata.h"
  35. #include "umutex.h"
  36. #include "uarrsort.h"
  37. #include "uassert.h"
  38. #include "udataswp.h"
  39. #include "cstring.h"
  40. #include "cmemory.h"
  41. #include "ucnv_io.h"
  42. #include "uenumimp.h"
  43. #include "ucln_cmn.h"
  44. /* Format of cnvalias.icu -----------------------------------------------------
  45. *
  46. * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
  47. * This binary form contains several tables. All indexes are to uint16_t
  48. * units, and not to the bytes (uint8_t units). Addressing everything on
  49. * 16-bit boundaries allows us to store more information with small index
  50. * numbers, which are also 16-bit in size. The majority of the table (except
  51. * the string table) are 16-bit numbers.
  52. *
  53. * First there is the size of the Table of Contents (TOC). The TOC
  54. * entries contain the size of each section. In order to find the offset
  55. * you just need to sum up the previous offsets.
  56. * The TOC length and entries are an array of uint32_t values.
  57. * The first section after the TOC starts immediately after the TOC.
  58. *
  59. * 1) This section contains a list of converters. This list contains indexes
  60. * into the string table for the converter name. The index of this list is
  61. * also used by other sections, which are mentioned later on.
  62. * This list is not sorted.
  63. *
  64. * 2) This section contains a list of tags. This list contains indexes
  65. * into the string table for the tag name. The index of this list is
  66. * also used by other sections, which are mentioned later on.
  67. * This list is in priority order of standards.
  68. *
  69. * 3) This section contains a list of sorted unique aliases. This
  70. * list contains indexes into the string table for the alias name. The
  71. * index of this list is also used by other sections, like the 4th section.
  72. * The index for the 3rd and 4th section is used to get the
  73. * alias -> converter name mapping. Section 3 and 4 form a two column table.
  74. * Some of the most significant bits of each index may contain other
  75. * information (see findConverter for details).
  76. *
  77. * 4) This section contains a list of mapped converter names. Consider this
  78. * as a table that maps the 3rd section to the 1st section. This list contains
  79. * indexes into the 1st section. The index of this list is the same index in
  80. * the 3rd section. There is also some extra information in the high bits of
  81. * each converter index in this table. Currently it's only used to say that
  82. * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
  83. * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
  84. * the predigested form of the 5th section so that an alias lookup can be fast.
  85. *
  86. * 5) This section contains a 2D array with indexes to the 6th section. This
  87. * section is the full form of all alias mappings. The column index is the
  88. * index into the converter list (column header). The row index is the index
  89. * to tag list (row header). This 2D array is the top part a 3D array. The
  90. * third dimension is in the 6th section.
  91. *
  92. * 6) This is blob of variable length arrays. Each array starts with a size,
  93. * and is followed by indexes to alias names in the string table. This is
  94. * the third dimension to the section 5. No other section should be referencing
  95. * this section.
  96. *
  97. * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
  98. * presence indicates that a section 9 exists. UConverterAliasOptions specifies
  99. * what type of string normalization is used among other potential things in the
  100. * future.
  101. *
  102. * 8) This is the string table. All strings are indexed on an even address.
  103. * There are two reasons for this. First many chip architectures locate strings
  104. * faster on even address boundaries. Second, since all indexes are 16-bit
  105. * numbers, this string table can be 128KB in size instead of 64KB when we
  106. * only have strings starting on an even address.
  107. *
  108. * 9) When present this is a set of prenormalized strings from section 8. This
  109. * table contains normalized strings with the dashes and spaces stripped out,
  110. * and all strings lowercased. In the future, the options in section 7 may state
  111. * other types of normalization.
  112. *
  113. * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
  114. * has a unique alias among all converters. That same alias can
  115. * be mentioned in other standards on different converters,
  116. * but only one alias per tag can be unique.
  117. *
  118. *
  119. * Converter Names (Usually in TR22 form)
  120. * -------------------------------------------.
  121. * T / /|
  122. * a / / |
  123. * g / / |
  124. * s / / |
  125. * / / |
  126. * ------------------------------------------/ |
  127. * A | | |
  128. * l | | |
  129. * i | | /
  130. * a | | /
  131. * s | | /
  132. * e | | /
  133. * s | |/
  134. * -------------------------------------------
  135. *
  136. *
  137. *
  138. * Here is what it really looks like. It's like swiss cheese.
  139. * There are holes. Some converters aren't recognized by
  140. * a standard, or they are really old converters that the
  141. * standard doesn't recognize anymore.
  142. *
  143. * Converter Names (Usually in TR22 form)
  144. * -------------------------------------------.
  145. * T /##########################################/|
  146. * a / # # /#
  147. * g / # ## ## ### # ### ### ### #/
  148. * s / # ##### #### ## ## #/#
  149. * / ### # # ## # # # ### # # #/##
  150. * ------------------------------------------/# #
  151. * A |### # # ## # # # ### # # #|# #
  152. * l |# # # # # ## # #|# #
  153. * i |# # # # # # #|#
  154. * a |# #|#
  155. * s | #|#
  156. * e
  157. * s
  158. *
  159. */
  160. /**
  161. * Used by the UEnumeration API
  162. */
  163. typedef struct UAliasContext {
  164. uint32_t listOffset;
  165. uint32_t listIdx;
  166. } UAliasContext;
  167. static const char DATA_NAME[] = "cnvalias";
  168. static const char DATA_TYPE[] = "icu";
  169. static UDataMemory *gAliasData=nullptr;
  170. static icu::UInitOnce gAliasDataInitOnce {};
  171. enum {
  172. tocLengthIndex=0,
  173. converterListIndex=1,
  174. tagListIndex=2,
  175. aliasListIndex=3,
  176. untaggedConvArrayIndex=4,
  177. taggedAliasArrayIndex=5,
  178. taggedAliasListsIndex=6,
  179. tableOptionsIndex=7,
  180. stringTableIndex=8,
  181. normalizedStringTableIndex=9,
  182. offsetsCount, /* length of the swapper's temporary offsets[] */
  183. minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
  184. };
  185. static const UConverterAliasOptions defaultTableOptions = {
  186. UCNV_IO_UNNORMALIZED,
  187. 0 /* containsCnvOptionInfo */
  188. };
  189. static UConverterAlias gMainTable;
  190. #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
  191. #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
  192. static UBool U_CALLCONV
  193. isAcceptable(void * /*context*/,
  194. const char * /*type*/, const char * /*name*/,
  195. const UDataInfo *pInfo) {
  196. return
  197. pInfo->size>=20 &&
  198. pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
  199. pInfo->charsetFamily==U_CHARSET_FAMILY &&
  200. pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
  201. pInfo->dataFormat[1]==0x76 &&
  202. pInfo->dataFormat[2]==0x41 &&
  203. pInfo->dataFormat[3]==0x6c &&
  204. pInfo->formatVersion[0]==3;
  205. }
  206. static UBool U_CALLCONV ucnv_io_cleanup()
  207. {
  208. if (gAliasData) {
  209. udata_close(gAliasData);
  210. gAliasData = nullptr;
  211. }
  212. gAliasDataInitOnce.reset();
  213. uprv_memset(&gMainTable, 0, sizeof(gMainTable));
  214. return true; /* Everything was cleaned up */
  215. }
  216. static void U_CALLCONV initAliasData(UErrorCode &errCode) {
  217. UDataMemory *data;
  218. const uint16_t *table;
  219. const uint32_t *sectionSizes;
  220. uint32_t tableStart;
  221. uint32_t currOffset;
  222. ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
  223. U_ASSERT(gAliasData == nullptr);
  224. data = udata_openChoice(nullptr, DATA_TYPE, DATA_NAME, isAcceptable, nullptr, &errCode);
  225. if(U_FAILURE(errCode)) {
  226. return;
  227. }
  228. sectionSizes = static_cast<const uint32_t*>(udata_getMemory(data));
  229. table = reinterpret_cast<const uint16_t*>(sectionSizes);
  230. tableStart = sectionSizes[0];
  231. if (tableStart < minTocLength) {
  232. errCode = U_INVALID_FORMAT_ERROR;
  233. udata_close(data);
  234. return;
  235. }
  236. gAliasData = data;
  237. gMainTable.converterListSize = sectionSizes[1];
  238. gMainTable.tagListSize = sectionSizes[2];
  239. gMainTable.aliasListSize = sectionSizes[3];
  240. gMainTable.untaggedConvArraySize = sectionSizes[4];
  241. gMainTable.taggedAliasArraySize = sectionSizes[5];
  242. gMainTable.taggedAliasListsSize = sectionSizes[6];
  243. gMainTable.optionTableSize = sectionSizes[7];
  244. gMainTable.stringTableSize = sectionSizes[8];
  245. if (tableStart > 8) {
  246. gMainTable.normalizedStringTableSize = sectionSizes[9];
  247. }
  248. currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
  249. gMainTable.converterList = table + currOffset;
  250. currOffset += gMainTable.converterListSize;
  251. gMainTable.tagList = table + currOffset;
  252. currOffset += gMainTable.tagListSize;
  253. gMainTable.aliasList = table + currOffset;
  254. currOffset += gMainTable.aliasListSize;
  255. gMainTable.untaggedConvArray = table + currOffset;
  256. currOffset += gMainTable.untaggedConvArraySize;
  257. gMainTable.taggedAliasArray = table + currOffset;
  258. /* aliasLists is a 1's based array, but it has a padding character */
  259. currOffset += gMainTable.taggedAliasArraySize;
  260. gMainTable.taggedAliasLists = table + currOffset;
  261. currOffset += gMainTable.taggedAliasListsSize;
  262. if (gMainTable.optionTableSize > 0
  263. && reinterpret_cast<const UConverterAliasOptions*>(table + currOffset)->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
  264. {
  265. /* Faster table */
  266. gMainTable.optionTable = reinterpret_cast<const UConverterAliasOptions*>(table + currOffset);
  267. }
  268. else {
  269. /* Smaller table, or I can't handle this normalization mode!
  270. Use the original slower table lookup. */
  271. gMainTable.optionTable = &defaultTableOptions;
  272. }
  273. currOffset += gMainTable.optionTableSize;
  274. gMainTable.stringTable = table + currOffset;
  275. currOffset += gMainTable.stringTableSize;
  276. gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
  277. ? gMainTable.stringTable : (table + currOffset));
  278. }
  279. static UBool
  280. haveAliasData(UErrorCode *pErrorCode) {
  281. umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
  282. return U_SUCCESS(*pErrorCode);
  283. }
  284. static inline UBool
  285. isAlias(const char *alias, UErrorCode *pErrorCode) {
  286. if(alias==nullptr) {
  287. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  288. return false;
  289. }
  290. return *alias != 0;
  291. }
  292. static uint32_t getTagNumber(const char *tagname) {
  293. if (gMainTable.tagList) {
  294. uint32_t tagNum;
  295. for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
  296. if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
  297. return tagNum;
  298. }
  299. }
  300. }
  301. return UINT32_MAX;
  302. }
  303. /* character types relevant for ucnv_compareNames() */
  304. enum {
  305. UIGNORE,
  306. ZERO,
  307. NONZERO,
  308. MINLETTER /* any values from here on are lowercase letter mappings */
  309. };
  310. /* character types for ASCII 00..7F */
  311. static const uint8_t asciiTypes[128] = {
  312. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  313. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  314. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  315. ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
  316. 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
  317. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
  318. 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
  319. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
  320. };
  321. #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
  322. /* character types for EBCDIC 80..FF */
  323. static const uint8_t ebcdicTypes[128] = {
  324. 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
  325. 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
  326. 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
  327. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  328. 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
  329. 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
  330. 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
  331. ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
  332. };
  333. #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
  334. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  335. # define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
  336. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  337. # define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
  338. #else
  339. # error U_CHARSET_FAMILY is not valid
  340. #endif
  341. /* @see ucnv_compareNames */
  342. U_CAPI char * U_CALLCONV
  343. ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
  344. char *dstItr = dst;
  345. uint8_t type, nextType;
  346. char c1;
  347. UBool afterDigit = false;
  348. while ((c1 = *name++) != 0) {
  349. type = GET_ASCII_TYPE(c1);
  350. switch (type) {
  351. case UIGNORE:
  352. afterDigit = false;
  353. continue; /* ignore all but letters and digits */
  354. case ZERO:
  355. if (!afterDigit) {
  356. nextType = GET_ASCII_TYPE(*name);
  357. if (nextType == ZERO || nextType == NONZERO) {
  358. continue; /* ignore leading zero before another digit */
  359. }
  360. }
  361. break;
  362. case NONZERO:
  363. afterDigit = true;
  364. break;
  365. default:
  366. c1 = (char)type; /* lowercased letter */
  367. afterDigit = false;
  368. break;
  369. }
  370. *dstItr++ = c1;
  371. }
  372. *dstItr = 0;
  373. return dst;
  374. }
  375. U_CAPI char * U_CALLCONV
  376. ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
  377. char *dstItr = dst;
  378. uint8_t type, nextType;
  379. char c1;
  380. UBool afterDigit = false;
  381. while ((c1 = *name++) != 0) {
  382. type = GET_EBCDIC_TYPE(c1);
  383. switch (type) {
  384. case UIGNORE:
  385. afterDigit = false;
  386. continue; /* ignore all but letters and digits */
  387. case ZERO:
  388. if (!afterDigit) {
  389. nextType = GET_EBCDIC_TYPE(*name);
  390. if (nextType == ZERO || nextType == NONZERO) {
  391. continue; /* ignore leading zero before another digit */
  392. }
  393. }
  394. break;
  395. case NONZERO:
  396. afterDigit = true;
  397. break;
  398. default:
  399. c1 = (char)type; /* lowercased letter */
  400. afterDigit = false;
  401. break;
  402. }
  403. *dstItr++ = c1;
  404. }
  405. *dstItr = 0;
  406. return dst;
  407. }
  408. /**
  409. * Do a fuzzy compare of two converter/alias names.
  410. * The comparison is case-insensitive, ignores leading zeroes if they are not
  411. * followed by further digits, and ignores all but letters and digits.
  412. * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
  413. * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
  414. * at http://www.unicode.org/reports/tr22/
  415. *
  416. * This is a symmetrical (commutative) operation; order of arguments
  417. * is insignificant. This is an important property for sorting the
  418. * list (when the list is preprocessed into binary form) and for
  419. * performing binary searches on it at run time.
  420. *
  421. * @param name1 a converter name or alias, zero-terminated
  422. * @param name2 a converter name or alias, zero-terminated
  423. * @return 0 if the names match, or a negative value if the name1
  424. * lexically precedes name2, or a positive value if the name1
  425. * lexically follows name2.
  426. *
  427. * @see ucnv_io_stripForCompare
  428. */
  429. U_CAPI int U_EXPORT2
  430. ucnv_compareNames(const char *name1, const char *name2) {
  431. int rc;
  432. uint8_t type, nextType;
  433. char c1, c2;
  434. UBool afterDigit1 = false, afterDigit2 = false;
  435. for (;;) {
  436. while ((c1 = *name1++) != 0) {
  437. type = GET_CHAR_TYPE(c1);
  438. switch (type) {
  439. case UIGNORE:
  440. afterDigit1 = false;
  441. continue; /* ignore all but letters and digits */
  442. case ZERO:
  443. if (!afterDigit1) {
  444. nextType = GET_CHAR_TYPE(*name1);
  445. if (nextType == ZERO || nextType == NONZERO) {
  446. continue; /* ignore leading zero before another digit */
  447. }
  448. }
  449. break;
  450. case NONZERO:
  451. afterDigit1 = true;
  452. break;
  453. default:
  454. c1 = (char)type; /* lowercased letter */
  455. afterDigit1 = false;
  456. break;
  457. }
  458. break; /* deliver c1 */
  459. }
  460. while ((c2 = *name2++) != 0) {
  461. type = GET_CHAR_TYPE(c2);
  462. switch (type) {
  463. case UIGNORE:
  464. afterDigit2 = false;
  465. continue; /* ignore all but letters and digits */
  466. case ZERO:
  467. if (!afterDigit2) {
  468. nextType = GET_CHAR_TYPE(*name2);
  469. if (nextType == ZERO || nextType == NONZERO) {
  470. continue; /* ignore leading zero before another digit */
  471. }
  472. }
  473. break;
  474. case NONZERO:
  475. afterDigit2 = true;
  476. break;
  477. default:
  478. c2 = (char)type; /* lowercased letter */
  479. afterDigit2 = false;
  480. break;
  481. }
  482. break; /* deliver c2 */
  483. }
  484. /* If we reach the ends of both strings then they match */
  485. if ((c1|c2)==0) {
  486. return 0;
  487. }
  488. /* Case-insensitive comparison */
  489. rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
  490. if (rc != 0) {
  491. return rc;
  492. }
  493. }
  494. }
  495. /*
  496. * search for an alias
  497. * return the converter number index for gConverterList
  498. */
  499. static inline uint32_t
  500. findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
  501. uint32_t mid, start, limit;
  502. uint32_t lastMid;
  503. int result;
  504. int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
  505. char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
  506. if (!isUnnormalized) {
  507. if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
  508. *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
  509. return UINT32_MAX;
  510. }
  511. /* Lower case and remove ignoreable characters. */
  512. ucnv_io_stripForCompare(strippedName, alias);
  513. alias = strippedName;
  514. }
  515. /* do a binary search for the alias */
  516. start = 0;
  517. limit = gMainTable.untaggedConvArraySize;
  518. mid = limit;
  519. lastMid = UINT32_MAX;
  520. for (;;) {
  521. mid = (start + limit) / 2;
  522. if (lastMid == mid) { /* Have we moved? */
  523. break; /* We haven't moved, and it wasn't found. */
  524. }
  525. lastMid = mid;
  526. if (isUnnormalized) {
  527. result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
  528. }
  529. else {
  530. result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
  531. }
  532. if (result < 0) {
  533. limit = mid;
  534. } else if (result > 0) {
  535. start = mid;
  536. } else {
  537. /* Since the gencnval tool folds duplicates into one entry,
  538. * this alias in gAliasList is unique, but different standards
  539. * may map an alias to different converters.
  540. */
  541. if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
  542. *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
  543. }
  544. /* State whether the canonical converter name contains an option.
  545. This information is contained in this list in order to maintain backward & forward compatibility. */
  546. if (containsOption) {
  547. UBool containsCnvOptionInfo = static_cast<UBool>(gMainTable.optionTable->containsCnvOptionInfo);
  548. *containsOption = static_cast<UBool>((containsCnvOptionInfo
  549. && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
  550. || !containsCnvOptionInfo);
  551. }
  552. return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
  553. }
  554. }
  555. return UINT32_MAX;
  556. }
  557. /*
  558. * Is this alias in this list?
  559. * alias and listOffset should be non-nullptr.
  560. */
  561. static inline UBool
  562. isAliasInList(const char *alias, uint32_t listOffset) {
  563. if (listOffset) {
  564. uint32_t currAlias;
  565. uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
  566. /* +1 to skip listCount */
  567. const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
  568. for (currAlias = 0; currAlias < listCount; currAlias++) {
  569. if (currList[currAlias]
  570. && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
  571. {
  572. return true;
  573. }
  574. }
  575. }
  576. return false;
  577. }
  578. /*
  579. * Search for an standard name of an alias (what is the default name
  580. * that this standard uses?)
  581. * return the listOffset for gTaggedAliasLists. If it's 0,
  582. * the it couldn't be found, but the parameters are valid.
  583. */
  584. static uint32_t
  585. findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
  586. uint32_t idx;
  587. uint32_t listOffset;
  588. uint32_t convNum;
  589. UErrorCode myErr = U_ZERO_ERROR;
  590. uint32_t tagNum = getTagNumber(standard);
  591. /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
  592. convNum = findConverter(alias, nullptr, &myErr);
  593. if (myErr != U_ZERO_ERROR) {
  594. *pErrorCode = myErr;
  595. }
  596. if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
  597. listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
  598. if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
  599. return listOffset;
  600. }
  601. if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
  602. /* Uh Oh! They used an ambiguous alias.
  603. We have to search the whole swiss cheese starting
  604. at the highest standard affinity.
  605. This may take a while.
  606. */
  607. for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
  608. listOffset = gMainTable.taggedAliasArray[idx];
  609. if (listOffset && isAliasInList(alias, listOffset)) {
  610. uint32_t currTagNum = idx/gMainTable.converterListSize;
  611. uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
  612. uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
  613. if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
  614. return tempListOffset;
  615. }
  616. /* else keep on looking */
  617. /* We could speed this up by starting on the next row
  618. because an alias is unique per row, right now.
  619. This would change if alias versioning appears. */
  620. }
  621. }
  622. /* The standard doesn't know about the alias */
  623. }
  624. /* else no default name */
  625. return 0;
  626. }
  627. /* else converter or tag not found */
  628. return UINT32_MAX;
  629. }
  630. /* Return the canonical name */
  631. static uint32_t
  632. findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
  633. uint32_t idx;
  634. uint32_t listOffset;
  635. uint32_t convNum;
  636. UErrorCode myErr = U_ZERO_ERROR;
  637. uint32_t tagNum = getTagNumber(standard);
  638. /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
  639. convNum = findConverter(alias, nullptr, &myErr);
  640. if (myErr != U_ZERO_ERROR) {
  641. *pErrorCode = myErr;
  642. }
  643. if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
  644. listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
  645. if (listOffset && isAliasInList(alias, listOffset)) {
  646. return convNum;
  647. }
  648. if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
  649. /* Uh Oh! They used an ambiguous alias.
  650. We have to search one slice of the swiss cheese.
  651. We search only in the requested tag, not the whole thing.
  652. This may take a while.
  653. */
  654. uint32_t convStart = (tagNum)*gMainTable.converterListSize;
  655. uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
  656. for (idx = convStart; idx < convLimit; idx++) {
  657. listOffset = gMainTable.taggedAliasArray[idx];
  658. if (listOffset && isAliasInList(alias, listOffset)) {
  659. return idx-convStart;
  660. }
  661. }
  662. /* The standard doesn't know about the alias */
  663. }
  664. /* else no canonical name */
  665. }
  666. /* else converter or tag not found */
  667. return UINT32_MAX;
  668. }
  669. U_CAPI const char *
  670. ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
  671. const char *aliasTmp = alias;
  672. int32_t i = 0;
  673. for (i = 0; i < 2; i++) {
  674. if (i == 1) {
  675. /*
  676. * After the first unsuccess converter lookup, check to see if
  677. * the name begins with 'x-'. If it does, strip it off and try
  678. * again. This behaviour is similar to how ICU4J does it.
  679. */
  680. if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
  681. aliasTmp = aliasTmp+2;
  682. } else {
  683. break;
  684. }
  685. }
  686. if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
  687. uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
  688. if (convNum < gMainTable.converterListSize) {
  689. return GET_STRING(gMainTable.converterList[convNum]);
  690. }
  691. /* else converter not found */
  692. } else {
  693. break;
  694. }
  695. }
  696. return nullptr;
  697. }
  698. U_CDECL_BEGIN
  699. static int32_t U_CALLCONV
  700. ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
  701. int32_t value = 0;
  702. UAliasContext *myContext = (UAliasContext *)(enumerator->context);
  703. uint32_t listOffset = myContext->listOffset;
  704. if (listOffset) {
  705. value = gMainTable.taggedAliasLists[listOffset];
  706. }
  707. return value;
  708. }
  709. static const char * U_CALLCONV
  710. ucnv_io_nextStandardAliases(UEnumeration *enumerator,
  711. int32_t* resultLength,
  712. UErrorCode * /*pErrorCode*/)
  713. {
  714. UAliasContext *myContext = (UAliasContext *)(enumerator->context);
  715. uint32_t listOffset = myContext->listOffset;
  716. if (listOffset) {
  717. uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
  718. const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
  719. if (myContext->listIdx < listCount) {
  720. const char *myStr = GET_STRING(currList[myContext->listIdx++]);
  721. if (resultLength) {
  722. *resultLength = (int32_t)uprv_strlen(myStr);
  723. }
  724. return myStr;
  725. }
  726. }
  727. /* Either we accessed a zero length list, or we enumerated too far. */
  728. if (resultLength) {
  729. *resultLength = 0;
  730. }
  731. return nullptr;
  732. }
  733. static void U_CALLCONV
  734. ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
  735. ((UAliasContext *)(enumerator->context))->listIdx = 0;
  736. }
  737. static void U_CALLCONV
  738. ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
  739. uprv_free(enumerator->context);
  740. uprv_free(enumerator);
  741. }
  742. U_CDECL_END
  743. /* Enumerate the aliases for the specified converter and standard tag */
  744. static const UEnumeration gEnumAliases = {
  745. nullptr,
  746. nullptr,
  747. ucnv_io_closeUEnumeration,
  748. ucnv_io_countStandardAliases,
  749. uenum_unextDefault,
  750. ucnv_io_nextStandardAliases,
  751. ucnv_io_resetStandardAliases
  752. };
  753. U_CAPI UEnumeration * U_EXPORT2
  754. ucnv_openStandardNames(const char *convName,
  755. const char *standard,
  756. UErrorCode *pErrorCode)
  757. {
  758. UEnumeration *myEnum = nullptr;
  759. if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
  760. uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
  761. /* When listOffset == 0, we want to acknowledge that the
  762. converter name and standard are okay, but there
  763. is nothing to enumerate. */
  764. if (listOffset < gMainTable.taggedAliasListsSize) {
  765. UAliasContext *myContext;
  766. myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
  767. if (myEnum == nullptr) {
  768. *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  769. return nullptr;
  770. }
  771. uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
  772. myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
  773. if (myContext == nullptr) {
  774. *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  775. uprv_free(myEnum);
  776. return nullptr;
  777. }
  778. myContext->listOffset = listOffset;
  779. myContext->listIdx = 0;
  780. myEnum->context = myContext;
  781. }
  782. /* else converter or tag not found */
  783. }
  784. return myEnum;
  785. }
  786. static uint16_t
  787. ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
  788. if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
  789. uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
  790. if (convNum < gMainTable.converterListSize) {
  791. /* tagListNum - 1 is the ALL tag */
  792. int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
  793. if (listOffset) {
  794. return gMainTable.taggedAliasLists[listOffset];
  795. }
  796. /* else this shouldn't happen. internal program error */
  797. }
  798. /* else converter not found */
  799. }
  800. return 0;
  801. }
  802. static uint16_t
  803. ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
  804. if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
  805. uint32_t currAlias;
  806. uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
  807. if (convNum < gMainTable.converterListSize) {
  808. /* tagListNum - 1 is the ALL tag */
  809. int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
  810. if (listOffset) {
  811. uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
  812. /* +1 to skip listCount */
  813. const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
  814. for (currAlias = start; currAlias < listCount; currAlias++) {
  815. aliases[currAlias] = GET_STRING(currList[currAlias]);
  816. }
  817. }
  818. /* else this shouldn't happen. internal program error */
  819. }
  820. /* else converter not found */
  821. }
  822. return 0;
  823. }
  824. static const char *
  825. ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
  826. if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
  827. uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
  828. if (convNum < gMainTable.converterListSize) {
  829. /* tagListNum - 1 is the ALL tag */
  830. int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
  831. if (listOffset) {
  832. uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
  833. /* +1 to skip listCount */
  834. const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
  835. if (n < listCount) {
  836. return GET_STRING(currList[n]);
  837. }
  838. *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
  839. }
  840. /* else this shouldn't happen. internal program error */
  841. }
  842. /* else converter not found */
  843. }
  844. return nullptr;
  845. }
  846. static uint16_t
  847. ucnv_io_countStandards(UErrorCode *pErrorCode) {
  848. if (haveAliasData(pErrorCode)) {
  849. /* Don't include the empty list */
  850. return static_cast<uint16_t>(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
  851. }
  852. return 0;
  853. }
  854. U_CAPI const char * U_EXPORT2
  855. ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
  856. if (haveAliasData(pErrorCode)) {
  857. if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
  858. return GET_STRING(gMainTable.tagList[n]);
  859. }
  860. *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
  861. }
  862. return nullptr;
  863. }
  864. U_CAPI const char * U_EXPORT2
  865. ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
  866. if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
  867. uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
  868. if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
  869. const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
  870. /* Get the preferred name from this list */
  871. if (currList[0]) {
  872. return GET_STRING(currList[0]);
  873. }
  874. /* else someone screwed up the alias table. */
  875. /* *pErrorCode = U_INVALID_FORMAT_ERROR */
  876. }
  877. }
  878. return nullptr;
  879. }
  880. U_CAPI uint16_t U_EXPORT2
  881. ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
  882. {
  883. return ucnv_io_countAliases(alias, pErrorCode);
  884. }
  885. U_CAPI const char* U_EXPORT2
  886. ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
  887. {
  888. return ucnv_io_getAlias(alias, n, pErrorCode);
  889. }
  890. U_CAPI void U_EXPORT2
  891. ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
  892. {
  893. ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
  894. }
  895. U_CAPI uint16_t U_EXPORT2
  896. ucnv_countStandards()
  897. {
  898. UErrorCode err = U_ZERO_ERROR;
  899. return ucnv_io_countStandards(&err);
  900. }
  901. U_CAPI const char * U_EXPORT2
  902. ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
  903. if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
  904. uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
  905. if (convNum < gMainTable.converterListSize) {
  906. return GET_STRING(gMainTable.converterList[convNum]);
  907. }
  908. }
  909. return nullptr;
  910. }
  911. U_CDECL_BEGIN
  912. static int32_t U_CALLCONV
  913. ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
  914. return gMainTable.converterListSize;
  915. }
  916. static const char * U_CALLCONV
  917. ucnv_io_nextAllConverters(UEnumeration *enumerator,
  918. int32_t* resultLength,
  919. UErrorCode * /*pErrorCode*/)
  920. {
  921. uint16_t *myContext = (uint16_t *)(enumerator->context);
  922. if (*myContext < gMainTable.converterListSize) {
  923. const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
  924. if (resultLength) {
  925. *resultLength = (int32_t)uprv_strlen(myStr);
  926. }
  927. return myStr;
  928. }
  929. /* Either we accessed a zero length list, or we enumerated too far. */
  930. if (resultLength) {
  931. *resultLength = 0;
  932. }
  933. return nullptr;
  934. }
  935. static void U_CALLCONV
  936. ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
  937. *((uint16_t *)(enumerator->context)) = 0;
  938. }
  939. U_CDECL_END
  940. static const UEnumeration gEnumAllConverters = {
  941. nullptr,
  942. nullptr,
  943. ucnv_io_closeUEnumeration,
  944. ucnv_io_countAllConverters,
  945. uenum_unextDefault,
  946. ucnv_io_nextAllConverters,
  947. ucnv_io_resetAllConverters
  948. };
  949. U_CAPI UEnumeration * U_EXPORT2
  950. ucnv_openAllNames(UErrorCode *pErrorCode) {
  951. UEnumeration *myEnum = nullptr;
  952. if (haveAliasData(pErrorCode)) {
  953. uint16_t *myContext;
  954. myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
  955. if (myEnum == nullptr) {
  956. *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  957. return nullptr;
  958. }
  959. uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
  960. myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
  961. if (myContext == nullptr) {
  962. *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  963. uprv_free(myEnum);
  964. return nullptr;
  965. }
  966. *myContext = 0;
  967. myEnum->context = myContext;
  968. }
  969. return myEnum;
  970. }
  971. U_CAPI uint16_t
  972. ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
  973. if (haveAliasData(pErrorCode)) {
  974. return (uint16_t)gMainTable.converterListSize;
  975. }
  976. return 0;
  977. }
  978. /* alias table swapping ----------------------------------------------------- */
  979. U_CDECL_BEGIN
  980. typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
  981. U_CDECL_END
  982. /*
  983. * row of a temporary array
  984. *
  985. * gets platform-endian charset string indexes and sorting indexes;
  986. * after sorting this array by strings, the actual arrays are permutated
  987. * according to the sorting indexes
  988. */
  989. typedef struct TempRow {
  990. uint16_t strIndex, sortIndex;
  991. } TempRow;
  992. typedef struct TempAliasTable {
  993. const char *chars;
  994. TempRow *rows;
  995. uint16_t *resort;
  996. StripForCompareFn *stripForCompare;
  997. } TempAliasTable;
  998. enum {
  999. STACK_ROW_CAPACITY=500
  1000. };
  1001. static int32_t U_CALLCONV
  1002. io_compareRows(const void *context, const void *left, const void *right) {
  1003. char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
  1004. strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
  1005. TempAliasTable *tempTable=(TempAliasTable *)context;
  1006. const char *chars=tempTable->chars;
  1007. return static_cast<int32_t>(uprv_strcmp(
  1008. tempTable->stripForCompare(strippedLeft, chars + 2 * static_cast<const TempRow*>(left)->strIndex),
  1009. tempTable->stripForCompare(strippedRight, chars + 2 * static_cast<const TempRow*>(right)->strIndex)));
  1010. }
  1011. U_CAPI int32_t U_EXPORT2
  1012. ucnv_swapAliases(const UDataSwapper *ds,
  1013. const void *inData, int32_t length, void *outData,
  1014. UErrorCode *pErrorCode) {
  1015. const UDataInfo *pInfo;
  1016. int32_t headerSize;
  1017. const uint16_t *inTable;
  1018. const uint32_t *inSectionSizes;
  1019. uint32_t toc[offsetsCount];
  1020. uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
  1021. uint32_t i, count, tocLength, topOffset;
  1022. TempRow rows[STACK_ROW_CAPACITY];
  1023. uint16_t resort[STACK_ROW_CAPACITY];
  1024. TempAliasTable tempTable;
  1025. /* udata_swapDataHeader checks the arguments */
  1026. headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
  1027. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
  1028. return 0;
  1029. }
  1030. /* check data format and format version */
  1031. pInfo=(const UDataInfo *)((const char *)inData+4);
  1032. if(!(
  1033. pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
  1034. pInfo->dataFormat[1]==0x76 &&
  1035. pInfo->dataFormat[2]==0x41 &&
  1036. pInfo->dataFormat[3]==0x6c &&
  1037. pInfo->formatVersion[0]==3
  1038. )) {
  1039. udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
  1040. pInfo->dataFormat[0], pInfo->dataFormat[1],
  1041. pInfo->dataFormat[2], pInfo->dataFormat[3],
  1042. pInfo->formatVersion[0]);
  1043. *pErrorCode=U_UNSUPPORTED_ERROR;
  1044. return 0;
  1045. }
  1046. /* an alias table must contain at least the table of contents array */
  1047. if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
  1048. udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
  1049. length-headerSize);
  1050. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1051. return 0;
  1052. }
  1053. inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
  1054. inTable=(const uint16_t *)inSectionSizes;
  1055. uprv_memset(toc, 0, sizeof(toc));
  1056. toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
  1057. if(tocLength<minTocLength || offsetsCount<=tocLength) {
  1058. udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
  1059. *pErrorCode=U_INVALID_FORMAT_ERROR;
  1060. return 0;
  1061. }
  1062. /* read the known part of the table of contents */
  1063. for(i=converterListIndex; i<=tocLength; ++i) {
  1064. toc[i]=ds->readUInt32(inSectionSizes[i]);
  1065. }
  1066. /* compute offsets */
  1067. uprv_memset(offsets, 0, sizeof(offsets));
  1068. offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
  1069. for(i=tagListIndex; i<=tocLength; ++i) {
  1070. offsets[i]=offsets[i-1]+toc[i-1];
  1071. }
  1072. /* compute the overall size of the after-header data, in numbers of 16-bit units */
  1073. topOffset=offsets[i-1]+toc[i-1];
  1074. if(length>=0) {
  1075. uint16_t *outTable;
  1076. const uint16_t *p, *p2;
  1077. uint16_t *q, *q2;
  1078. uint16_t oldIndex;
  1079. if((length-headerSize)<(2*(int32_t)topOffset)) {
  1080. udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
  1081. length-headerSize);
  1082. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1083. return 0;
  1084. }
  1085. outTable=(uint16_t *)((char *)outData+headerSize);
  1086. /* swap the entire table of contents */
  1087. ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
  1088. /* swap unormalized strings & normalized strings */
  1089. ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
  1090. outTable+offsets[stringTableIndex], pErrorCode);
  1091. if(U_FAILURE(*pErrorCode)) {
  1092. udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
  1093. return 0;
  1094. }
  1095. if(ds->inCharset==ds->outCharset) {
  1096. /* no need to sort, just swap all 16-bit values together */
  1097. ds->swapArray16(ds,
  1098. inTable+offsets[converterListIndex],
  1099. 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
  1100. outTable+offsets[converterListIndex],
  1101. pErrorCode);
  1102. } else {
  1103. /* allocate the temporary table for sorting */
  1104. count=toc[aliasListIndex];
  1105. tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
  1106. if(count<=STACK_ROW_CAPACITY) {
  1107. tempTable.rows=rows;
  1108. tempTable.resort=resort;
  1109. } else {
  1110. tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
  1111. if(tempTable.rows==nullptr) {
  1112. udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
  1113. count);
  1114. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  1115. return 0;
  1116. }
  1117. tempTable.resort=(uint16_t *)(tempTable.rows+count);
  1118. }
  1119. if(ds->outCharset==U_ASCII_FAMILY) {
  1120. tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
  1121. } else /* U_EBCDIC_FAMILY */ {
  1122. tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
  1123. }
  1124. /*
  1125. * Sort unique aliases+mapped names.
  1126. *
  1127. * We need to sort the list again by outCharset strings because they
  1128. * sort differently for different charset families.
  1129. * First we set up a temporary table with the string indexes and
  1130. * sorting indexes and sort that.
  1131. * Then we permutate and copy/swap the actual values.
  1132. */
  1133. p=inTable+offsets[aliasListIndex];
  1134. q=outTable+offsets[aliasListIndex];
  1135. p2=inTable+offsets[untaggedConvArrayIndex];
  1136. q2=outTable+offsets[untaggedConvArrayIndex];
  1137. for(i=0; i<count; ++i) {
  1138. tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
  1139. tempTable.rows[i].sortIndex=(uint16_t)i;
  1140. }
  1141. uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
  1142. io_compareRows, &tempTable,
  1143. false, pErrorCode);
  1144. if(U_SUCCESS(*pErrorCode)) {
  1145. /* copy/swap/permutate items */
  1146. if(p!=q) {
  1147. for(i=0; i<count; ++i) {
  1148. oldIndex=tempTable.rows[i].sortIndex;
  1149. ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
  1150. ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
  1151. }
  1152. } else {
  1153. /*
  1154. * If we swap in-place, then the permutation must use another
  1155. * temporary array (tempTable.resort)
  1156. * before the results are copied to the outBundle.
  1157. */
  1158. uint16_t *r=tempTable.resort;
  1159. for(i=0; i<count; ++i) {
  1160. oldIndex=tempTable.rows[i].sortIndex;
  1161. ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
  1162. }
  1163. uprv_memcpy(q, r, 2*(size_t)count);
  1164. for(i=0; i<count; ++i) {
  1165. oldIndex=tempTable.rows[i].sortIndex;
  1166. ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
  1167. }
  1168. uprv_memcpy(q2, r, 2*(size_t)count);
  1169. }
  1170. }
  1171. if(tempTable.rows!=rows) {
  1172. uprv_free(tempTable.rows);
  1173. }
  1174. if(U_FAILURE(*pErrorCode)) {
  1175. udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
  1176. count);
  1177. return 0;
  1178. }
  1179. /* swap remaining 16-bit values */
  1180. ds->swapArray16(ds,
  1181. inTable+offsets[converterListIndex],
  1182. 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
  1183. outTable+offsets[converterListIndex],
  1184. pErrorCode);
  1185. ds->swapArray16(ds,
  1186. inTable+offsets[taggedAliasArrayIndex],
  1187. 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
  1188. outTable+offsets[taggedAliasArrayIndex],
  1189. pErrorCode);
  1190. }
  1191. }
  1192. return headerSize+2*(int32_t)topOffset;
  1193. }
  1194. #endif
  1195. /*
  1196. * Hey, Emacs, please set the following:
  1197. *
  1198. * Local Variables:
  1199. * indent-tabs-mode: nil
  1200. * End:
  1201. *
  1202. */