ucptrie.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. // © 2017 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // ucptrie.cpp (modified from utrie2.cpp)
  4. // created: 2017dec29 Markus W. Scherer
  5. // #define UCPTRIE_DEBUG
  6. #ifdef UCPTRIE_DEBUG
  7. # include <stdio.h>
  8. #endif
  9. #include "unicode/utypes.h"
  10. #include "unicode/ucptrie.h"
  11. #include "unicode/utf.h"
  12. #include "unicode/utf8.h"
  13. #include "unicode/utf16.h"
  14. #include "cmemory.h"
  15. #include "uassert.h"
  16. #include "ucptrie_impl.h"
  17. U_CAPI UCPTrie * U_EXPORT2
  18. ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
  19. const void *data, int32_t length, int32_t *pActualLength,
  20. UErrorCode *pErrorCode) {
  21. if (U_FAILURE(*pErrorCode)) {
  22. return nullptr;
  23. }
  24. if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) ||
  25. type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type ||
  26. valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) {
  27. *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
  28. return nullptr;
  29. }
  30. // Enough data for a trie header?
  31. if (length < (int32_t)sizeof(UCPTrieHeader)) {
  32. *pErrorCode = U_INVALID_FORMAT_ERROR;
  33. return nullptr;
  34. }
  35. // Check the signature.
  36. const UCPTrieHeader *header = (const UCPTrieHeader *)data;
  37. if (header->signature != UCPTRIE_SIG) {
  38. *pErrorCode = U_INVALID_FORMAT_ERROR;
  39. return nullptr;
  40. }
  41. int32_t options = header->options;
  42. int32_t typeInt = (options >> 6) & 3;
  43. int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK;
  44. if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 ||
  45. (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) {
  46. *pErrorCode = U_INVALID_FORMAT_ERROR;
  47. return nullptr;
  48. }
  49. UCPTrieType actualType = (UCPTrieType)typeInt;
  50. UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt;
  51. if (type < 0) {
  52. type = actualType;
  53. }
  54. if (valueWidth < 0) {
  55. valueWidth = actualValueWidth;
  56. }
  57. if (type != actualType || valueWidth != actualValueWidth) {
  58. *pErrorCode = U_INVALID_FORMAT_ERROR;
  59. return nullptr;
  60. }
  61. // Get the length values and offsets.
  62. UCPTrie tempTrie;
  63. uprv_memset(&tempTrie, 0, sizeof(tempTrie));
  64. tempTrie.indexLength = header->indexLength;
  65. tempTrie.dataLength =
  66. ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength;
  67. tempTrie.index3NullOffset = header->index3NullOffset;
  68. tempTrie.dataNullOffset =
  69. ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset;
  70. tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2;
  71. tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12;
  72. tempTrie.type = type;
  73. tempTrie.valueWidth = valueWidth;
  74. // Calculate the actual length.
  75. int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2;
  76. if (valueWidth == UCPTRIE_VALUE_BITS_16) {
  77. actualLength += tempTrie.dataLength * 2;
  78. } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
  79. actualLength += tempTrie.dataLength * 4;
  80. } else {
  81. actualLength += tempTrie.dataLength;
  82. }
  83. if (length < actualLength) {
  84. *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes.
  85. return nullptr;
  86. }
  87. // Allocate the trie.
  88. UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie));
  89. if (trie == nullptr) {
  90. *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  91. return nullptr;
  92. }
  93. uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
  94. #ifdef UCPTRIE_DEBUG
  95. trie->name = "fromSerialized";
  96. #endif
  97. // Set the pointers to its index and data arrays.
  98. const uint16_t *p16 = (const uint16_t *)(header + 1);
  99. trie->index = p16;
  100. p16 += trie->indexLength;
  101. // Get the data.
  102. int32_t nullValueOffset = trie->dataNullOffset;
  103. if (nullValueOffset >= trie->dataLength) {
  104. nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
  105. }
  106. switch (valueWidth) {
  107. case UCPTRIE_VALUE_BITS_16:
  108. trie->data.ptr16 = p16;
  109. trie->nullValue = trie->data.ptr16[nullValueOffset];
  110. break;
  111. case UCPTRIE_VALUE_BITS_32:
  112. trie->data.ptr32 = (const uint32_t *)p16;
  113. trie->nullValue = trie->data.ptr32[nullValueOffset];
  114. break;
  115. case UCPTRIE_VALUE_BITS_8:
  116. trie->data.ptr8 = (const uint8_t *)p16;
  117. trie->nullValue = trie->data.ptr8[nullValueOffset];
  118. break;
  119. default:
  120. // Unreachable because valueWidth was checked above.
  121. *pErrorCode = U_INVALID_FORMAT_ERROR;
  122. return nullptr;
  123. }
  124. if (pActualLength != nullptr) {
  125. *pActualLength = actualLength;
  126. }
  127. return trie;
  128. }
  129. U_CAPI void U_EXPORT2
  130. ucptrie_close(UCPTrie *trie) {
  131. uprv_free(trie);
  132. }
  133. U_CAPI UCPTrieType U_EXPORT2
  134. ucptrie_getType(const UCPTrie *trie) {
  135. return (UCPTrieType)trie->type;
  136. }
  137. U_CAPI UCPTrieValueWidth U_EXPORT2
  138. ucptrie_getValueWidth(const UCPTrie *trie) {
  139. return (UCPTrieValueWidth)trie->valueWidth;
  140. }
  141. U_CAPI int32_t U_EXPORT2
  142. ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) {
  143. int32_t i1 = c >> UCPTRIE_SHIFT_1;
  144. if (trie->type == UCPTRIE_TYPE_FAST) {
  145. U_ASSERT(0xffff < c && c < trie->highStart);
  146. i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
  147. } else {
  148. U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
  149. i1 += UCPTRIE_SMALL_INDEX_LENGTH;
  150. }
  151. int32_t i3Block = trie->index[
  152. (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
  153. int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
  154. int32_t dataBlock;
  155. if ((i3Block & 0x8000) == 0) {
  156. // 16-bit indexes
  157. dataBlock = trie->index[i3Block + i3];
  158. } else {
  159. // 18-bit indexes stored in groups of 9 entries per 8 indexes.
  160. i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
  161. i3 &= 7;
  162. dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000;
  163. dataBlock |= trie->index[i3Block + i3];
  164. }
  165. return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK);
  166. }
  167. U_CAPI int32_t U_EXPORT2
  168. ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) {
  169. UChar32 c = (lt1 << 12) | (t2 << 6) | t3;
  170. if (c >= trie->highStart) {
  171. // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher.
  172. return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
  173. }
  174. return ucptrie_internalSmallIndex(trie, c);
  175. }
  176. U_CAPI int32_t U_EXPORT2
  177. ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
  178. const uint8_t *start, const uint8_t *src) {
  179. int32_t i, length;
  180. // Support 64-bit pointers by avoiding cast of arbitrary difference.
  181. if ((src - start) <= 7) {
  182. i = length = (int32_t)(src - start);
  183. } else {
  184. i = length = 7;
  185. start = src - 7;
  186. }
  187. c = utf8_prevCharSafeBody(start, 0, &i, c, -1);
  188. i = length - i; // Number of bytes read backward from src.
  189. int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c);
  190. return (idx << 3) | i;
  191. }
  192. namespace {
  193. inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) {
  194. switch (valueWidth) {
  195. case UCPTRIE_VALUE_BITS_16:
  196. return data.ptr16[dataIndex];
  197. case UCPTRIE_VALUE_BITS_32:
  198. return data.ptr32[dataIndex];
  199. case UCPTRIE_VALUE_BITS_8:
  200. return data.ptr8[dataIndex];
  201. default:
  202. // Unreachable if the trie is properly initialized.
  203. return 0xffffffff;
  204. }
  205. }
  206. } // namespace
  207. U_CAPI uint32_t U_EXPORT2
  208. ucptrie_get(const UCPTrie *trie, UChar32 c) {
  209. int32_t dataIndex;
  210. if ((uint32_t)c <= 0x7f) {
  211. // linear ASCII
  212. dataIndex = c;
  213. } else {
  214. UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX;
  215. dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c);
  216. }
  217. return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex);
  218. }
  219. namespace {
  220. constexpr int32_t MAX_UNICODE = 0x10ffff;
  221. inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue,
  222. UCPMapValueFilter *filter, const void *context) {
  223. if (value == trieNullValue) {
  224. value = nullValue;
  225. } else if (filter != nullptr) {
  226. value = filter(context, value);
  227. }
  228. return value;
  229. }
  230. UChar32 getRange(const void *t, UChar32 start,
  231. UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
  232. if (static_cast<uint32_t>(start) > MAX_UNICODE) {
  233. return U_SENTINEL;
  234. }
  235. const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t);
  236. UCPTrieValueWidth valueWidth = static_cast<UCPTrieValueWidth>(trie->valueWidth);
  237. if (start >= trie->highStart) {
  238. if (pValue != nullptr) {
  239. int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
  240. uint32_t value = getValue(trie->data, valueWidth, di);
  241. if (filter != nullptr) { value = filter(context, value); }
  242. *pValue = value;
  243. }
  244. return MAX_UNICODE;
  245. }
  246. uint32_t nullValue = trie->nullValue;
  247. if (filter != nullptr) { nullValue = filter(context, nullValue); }
  248. const uint16_t *index = trie->index;
  249. int32_t prevI3Block = -1;
  250. int32_t prevBlock = -1;
  251. UChar32 c = start;
  252. uint32_t trieValue, value = nullValue;
  253. bool haveValue = false;
  254. do {
  255. int32_t i3Block;
  256. int32_t i3;
  257. int32_t i3BlockLength;
  258. int32_t dataBlockLength;
  259. if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) {
  260. i3Block = 0;
  261. i3 = c >> UCPTRIE_FAST_SHIFT;
  262. i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ?
  263. UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
  264. dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
  265. } else {
  266. // Use the multi-stage index.
  267. int32_t i1 = c >> UCPTRIE_SHIFT_1;
  268. if (trie->type == UCPTRIE_TYPE_FAST) {
  269. U_ASSERT(0xffff < c && c < trie->highStart);
  270. i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
  271. } else {
  272. U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
  273. i1 += UCPTRIE_SMALL_INDEX_LENGTH;
  274. }
  275. i3Block = trie->index[
  276. static_cast<int32_t>(trie->index[i1]) + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
  277. if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) {
  278. // The index-3 block is the same as the previous one, and filled with value.
  279. U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
  280. c += UCPTRIE_CP_PER_INDEX_2_ENTRY;
  281. continue;
  282. }
  283. prevI3Block = i3Block;
  284. if (i3Block == trie->index3NullOffset) {
  285. // This is the index-3 null block.
  286. if (haveValue) {
  287. if (nullValue != value) {
  288. return c - 1;
  289. }
  290. } else {
  291. trieValue = trie->nullValue;
  292. value = nullValue;
  293. if (pValue != nullptr) { *pValue = nullValue; }
  294. haveValue = true;
  295. }
  296. prevBlock = trie->dataNullOffset;
  297. c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
  298. continue;
  299. }
  300. i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
  301. i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH;
  302. dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
  303. }
  304. // Enumerate data blocks for one index-3 block.
  305. do {
  306. int32_t block;
  307. if ((i3Block & 0x8000) == 0) {
  308. block = index[i3Block + i3];
  309. } else {
  310. // 18-bit indexes stored in groups of 9 entries per 8 indexes.
  311. int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
  312. int32_t gi = i3 & 7;
  313. block = (static_cast<int32_t>(index[group++]) << (2 + (2 * gi))) & 0x30000;
  314. block |= index[group + gi];
  315. }
  316. if (block == prevBlock && (c - start) >= dataBlockLength) {
  317. // The block is the same as the previous one, and filled with value.
  318. U_ASSERT((c & (dataBlockLength - 1)) == 0);
  319. c += dataBlockLength;
  320. } else {
  321. int32_t dataMask = dataBlockLength - 1;
  322. prevBlock = block;
  323. if (block == trie->dataNullOffset) {
  324. // This is the data null block.
  325. if (haveValue) {
  326. if (nullValue != value) {
  327. return c - 1;
  328. }
  329. } else {
  330. trieValue = trie->nullValue;
  331. value = nullValue;
  332. if (pValue != nullptr) { *pValue = nullValue; }
  333. haveValue = true;
  334. }
  335. c = (c + dataBlockLength) & ~dataMask;
  336. } else {
  337. int32_t di = block + (c & dataMask);
  338. uint32_t trieValue2 = getValue(trie->data, valueWidth, di);
  339. if (haveValue) {
  340. if (trieValue2 != trieValue) {
  341. if (filter == nullptr ||
  342. maybeFilterValue(trieValue2, trie->nullValue, nullValue,
  343. filter, context) != value) {
  344. return c - 1;
  345. }
  346. trieValue = trieValue2; // may or may not help
  347. }
  348. } else {
  349. trieValue = trieValue2;
  350. value = maybeFilterValue(trieValue2, trie->nullValue, nullValue,
  351. filter, context);
  352. if (pValue != nullptr) { *pValue = value; }
  353. haveValue = true;
  354. }
  355. while ((++c & dataMask) != 0) {
  356. trieValue2 = getValue(trie->data, valueWidth, ++di);
  357. if (trieValue2 != trieValue) {
  358. if (filter == nullptr ||
  359. maybeFilterValue(trieValue2, trie->nullValue, nullValue,
  360. filter, context) != value) {
  361. return c - 1;
  362. }
  363. trieValue = trieValue2; // may or may not help
  364. }
  365. }
  366. }
  367. }
  368. } while (++i3 < i3BlockLength);
  369. } while (c < trie->highStart);
  370. U_ASSERT(haveValue);
  371. int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
  372. uint32_t highValue = getValue(trie->data, valueWidth, di);
  373. if (maybeFilterValue(highValue, trie->nullValue, nullValue,
  374. filter, context) != value) {
  375. return c - 1;
  376. } else {
  377. return MAX_UNICODE;
  378. }
  379. }
  380. } // namespace
  381. U_CFUNC UChar32
  382. ucptrie_internalGetRange(UCPTrieGetRange *getRange,
  383. const void *trie, UChar32 start,
  384. UCPMapRangeOption option, uint32_t surrogateValue,
  385. UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
  386. if (option == UCPMAP_RANGE_NORMAL) {
  387. return getRange(trie, start, filter, context, pValue);
  388. }
  389. uint32_t value;
  390. if (pValue == nullptr) {
  391. // We need to examine the range value even if the caller does not want it.
  392. pValue = &value;
  393. }
  394. UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
  395. UChar32 end = getRange(trie, start, filter, context, pValue);
  396. if (end < 0xd7ff || start > surrEnd) {
  397. return end;
  398. }
  399. // The range overlaps with surrogates, or ends just before the first one.
  400. if (*pValue == surrogateValue) {
  401. if (end >= surrEnd) {
  402. // Surrogates followed by a non-surrogateValue range,
  403. // or surrogates are part of a larger surrogateValue range.
  404. return end;
  405. }
  406. } else {
  407. if (start <= 0xd7ff) {
  408. return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates.
  409. }
  410. // Start is a surrogate with a non-surrogateValue code *unit* value.
  411. // Return a surrogateValue code *point* range.
  412. *pValue = surrogateValue;
  413. if (end > surrEnd) {
  414. return surrEnd; // Surrogate range ends before non-surrogateValue rest of range.
  415. }
  416. }
  417. // See if the surrogateValue surrogate range can be merged with
  418. // an immediately following range.
  419. uint32_t value2;
  420. UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2);
  421. if (value2 == surrogateValue) {
  422. return end2;
  423. }
  424. return surrEnd;
  425. }
  426. U_CAPI UChar32 U_EXPORT2
  427. ucptrie_getRange(const UCPTrie *trie, UChar32 start,
  428. UCPMapRangeOption option, uint32_t surrogateValue,
  429. UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
  430. return ucptrie_internalGetRange(getRange, trie, start,
  431. option, surrogateValue,
  432. filter, context, pValue);
  433. }
  434. U_CAPI int32_t U_EXPORT2
  435. ucptrie_toBinary(const UCPTrie *trie,
  436. void *data, int32_t capacity,
  437. UErrorCode *pErrorCode) {
  438. if (U_FAILURE(*pErrorCode)) {
  439. return 0;
  440. }
  441. UCPTrieType type = (UCPTrieType)trie->type;
  442. UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
  443. if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
  444. valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth ||
  445. capacity < 0 ||
  446. (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) {
  447. *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
  448. return 0;
  449. }
  450. int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2;
  451. switch (valueWidth) {
  452. case UCPTRIE_VALUE_BITS_16:
  453. length += trie->dataLength * 2;
  454. break;
  455. case UCPTRIE_VALUE_BITS_32:
  456. length += trie->dataLength * 4;
  457. break;
  458. case UCPTRIE_VALUE_BITS_8:
  459. length += trie->dataLength;
  460. break;
  461. default:
  462. // unreachable
  463. break;
  464. }
  465. if (capacity < length) {
  466. *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
  467. return length;
  468. }
  469. char *bytes = (char *)data;
  470. UCPTrieHeader *header = (UCPTrieHeader *)bytes;
  471. header->signature = UCPTRIE_SIG; // "Tri3"
  472. header->options = (uint16_t)(
  473. ((trie->dataLength & 0xf0000) >> 4) |
  474. ((trie->dataNullOffset & 0xf0000) >> 8) |
  475. (trie->type << 6) |
  476. valueWidth);
  477. header->indexLength = (uint16_t)trie->indexLength;
  478. header->dataLength = (uint16_t)trie->dataLength;
  479. header->index3NullOffset = trie->index3NullOffset;
  480. header->dataNullOffset = (uint16_t)trie->dataNullOffset;
  481. header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2;
  482. bytes += sizeof(UCPTrieHeader);
  483. uprv_memcpy(bytes, trie->index, trie->indexLength * 2);
  484. bytes += trie->indexLength * 2;
  485. switch (valueWidth) {
  486. case UCPTRIE_VALUE_BITS_16:
  487. uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2);
  488. break;
  489. case UCPTRIE_VALUE_BITS_32:
  490. uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4);
  491. break;
  492. case UCPTRIE_VALUE_BITS_8:
  493. uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength);
  494. break;
  495. default:
  496. // unreachable
  497. break;
  498. }
  499. return length;
  500. }
  501. namespace {
  502. #ifdef UCPTRIE_DEBUG
  503. long countNull(const UCPTrie *trie) {
  504. uint32_t nullValue=trie->nullValue;
  505. int32_t length=trie->dataLength;
  506. long count=0;
  507. switch (trie->valueWidth) {
  508. case UCPTRIE_VALUE_BITS_16:
  509. for(int32_t i=0; i<length; ++i) {
  510. if(trie->data.ptr16[i]==nullValue) { ++count; }
  511. }
  512. break;
  513. case UCPTRIE_VALUE_BITS_32:
  514. for(int32_t i=0; i<length; ++i) {
  515. if(trie->data.ptr32[i]==nullValue) { ++count; }
  516. }
  517. break;
  518. case UCPTRIE_VALUE_BITS_8:
  519. for(int32_t i=0; i<length; ++i) {
  520. if(trie->data.ptr8[i]==nullValue) { ++count; }
  521. }
  522. break;
  523. default:
  524. // unreachable
  525. break;
  526. }
  527. return count;
  528. }
  529. U_CFUNC void
  530. ucptrie_printLengths(const UCPTrie *trie, const char *which) {
  531. long indexLength=trie->indexLength;
  532. long dataLength=(long)trie->dataLength;
  533. long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+
  534. dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 :
  535. trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1);
  536. printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n",
  537. which, trie->name, indexLength, dataLength, countNull(trie), totalLength);
  538. }
  539. #endif
  540. } // namespace
  541. // UCPMap ----
  542. // Initially, this is the same as UCPTrie. This may well change.
  543. U_CAPI uint32_t U_EXPORT2
  544. ucpmap_get(const UCPMap *map, UChar32 c) {
  545. return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c);
  546. }
  547. U_CAPI UChar32 U_EXPORT2
  548. ucpmap_getRange(const UCPMap *map, UChar32 start,
  549. UCPMapRangeOption option, uint32_t surrogateValue,
  550. UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
  551. return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start,
  552. option, surrogateValue,
  553. filter, context, pValue);
  554. }