utrie2.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. *
  6. * Copyright (C) 2001-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. ******************************************************************************
  10. * file name: utrie2.cpp
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2008aug16 (starting from a copy of utrie.c)
  16. * created by: Markus W. Scherer
  17. *
  18. * This is a common implementation of a Unicode trie.
  19. * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
  20. * Unicode code points (0..0x10ffff).
  21. * This is the second common version of a Unicode trie (hence the name UTrie2).
  22. * See utrie2.h for a comparison.
  23. *
  24. * This file contains only the runtime and enumeration code, for read-only access.
  25. * See utrie2_builder.c for the builder code.
  26. */
  27. #include "unicode/utypes.h"
  28. #ifdef UCPTRIE_DEBUG
  29. #include "unicode/umutablecptrie.h"
  30. #endif
  31. #include "unicode/utf.h"
  32. #include "unicode/utf8.h"
  33. #include "unicode/utf16.h"
  34. #include "cmemory.h"
  35. #include "utrie2.h"
  36. #include "utrie2_impl.h"
  37. #include "uassert.h"
  38. /* Public UTrie2 API implementation ----------------------------------------- */
  39. static uint32_t
  40. get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
  41. int32_t i2, block;
  42. if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
  43. return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
  44. }
  45. if(U_IS_LEAD(c) && fromLSCP) {
  46. i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
  47. (c>>UTRIE2_SHIFT_2);
  48. } else {
  49. i2=trie->index1[c>>UTRIE2_SHIFT_1]+
  50. ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
  51. }
  52. block=trie->index2[i2];
  53. return trie->data[block+(c&UTRIE2_DATA_MASK)];
  54. }
  55. U_CAPI uint32_t U_EXPORT2
  56. utrie2_get32(const UTrie2 *trie, UChar32 c) {
  57. if(trie->data16!=nullptr) {
  58. return UTRIE2_GET16(trie, c);
  59. } else if(trie->data32!=nullptr) {
  60. return UTRIE2_GET32(trie, c);
  61. } else if((uint32_t)c>0x10ffff) {
  62. return trie->errorValue;
  63. } else {
  64. return get32(trie->newTrie, c, true);
  65. }
  66. }
  67. U_CAPI uint32_t U_EXPORT2
  68. utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
  69. if(!U_IS_LEAD(c)) {
  70. return trie->errorValue;
  71. }
  72. if(trie->data16!=nullptr) {
  73. return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
  74. } else if(trie->data32!=nullptr) {
  75. return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
  76. } else {
  77. return get32(trie->newTrie, c, false);
  78. }
  79. }
  80. static inline int32_t
  81. u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
  82. int32_t idx=
  83. _UTRIE2_INDEX_FROM_CP(
  84. trie,
  85. trie->data32==nullptr ? trie->indexLength : 0,
  86. c);
  87. return (idx<<3)|i;
  88. }
  89. U_CAPI int32_t U_EXPORT2
  90. utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
  91. const uint8_t *src, const uint8_t *limit) {
  92. int32_t i, length;
  93. i=0;
  94. /* support 64-bit pointers by avoiding cast of arbitrary difference */
  95. if((limit-src)<=7) {
  96. length=(int32_t)(limit-src);
  97. } else {
  98. length=7;
  99. }
  100. c=utf8_nextCharSafeBody(src, &i, length, c, -1);
  101. return u8Index(trie, c, i);
  102. }
  103. U_CAPI int32_t U_EXPORT2
  104. utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
  105. const uint8_t *start, const uint8_t *src) {
  106. int32_t i, length;
  107. /* support 64-bit pointers by avoiding cast of arbitrary difference */
  108. if((src-start)<=7) {
  109. i=length=(int32_t)(src-start);
  110. } else {
  111. i=length=7;
  112. start=src-7;
  113. }
  114. c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
  115. i=length-i; /* number of bytes read backward from src */
  116. return u8Index(trie, c, i);
  117. }
  118. U_CAPI UTrie2 * U_EXPORT2
  119. utrie2_openFromSerialized(UTrie2ValueBits valueBits,
  120. const void *data, int32_t length, int32_t *pActualLength,
  121. UErrorCode *pErrorCode) {
  122. const UTrie2Header *header;
  123. const uint16_t *p16;
  124. int32_t actualLength;
  125. UTrie2 tempTrie;
  126. UTrie2 *trie;
  127. if(U_FAILURE(*pErrorCode)) {
  128. return nullptr;
  129. }
  130. if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
  131. valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
  132. ) {
  133. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  134. return nullptr;
  135. }
  136. /* enough data for a trie header? */
  137. if(length<(int32_t)sizeof(UTrie2Header)) {
  138. *pErrorCode=U_INVALID_FORMAT_ERROR;
  139. return nullptr;
  140. }
  141. /* check the signature */
  142. header=(const UTrie2Header *)data;
  143. if(header->signature!=UTRIE2_SIG) {
  144. *pErrorCode=U_INVALID_FORMAT_ERROR;
  145. return nullptr;
  146. }
  147. /* get the options */
  148. if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
  149. *pErrorCode=U_INVALID_FORMAT_ERROR;
  150. return nullptr;
  151. }
  152. /* get the length values and offsets */
  153. uprv_memset(&tempTrie, 0, sizeof(tempTrie));
  154. tempTrie.indexLength=header->indexLength;
  155. tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
  156. tempTrie.index2NullOffset=header->index2NullOffset;
  157. tempTrie.dataNullOffset=header->dataNullOffset;
  158. tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
  159. tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
  160. if(valueBits==UTRIE2_16_VALUE_BITS) {
  161. tempTrie.highValueIndex+=tempTrie.indexLength;
  162. }
  163. /* calculate the actual length */
  164. actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
  165. if(valueBits==UTRIE2_16_VALUE_BITS) {
  166. actualLength+=tempTrie.dataLength*2;
  167. } else {
  168. actualLength+=tempTrie.dataLength*4;
  169. }
  170. if(length<actualLength) {
  171. *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */
  172. return nullptr;
  173. }
  174. /* allocate the trie */
  175. trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
  176. if(trie==nullptr) {
  177. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  178. return nullptr;
  179. }
  180. uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
  181. trie->memory=(uint32_t *)data;
  182. trie->length=actualLength;
  183. trie->isMemoryOwned=false;
  184. #ifdef UTRIE2_DEBUG
  185. trie->name="fromSerialized";
  186. #endif
  187. /* set the pointers to its index and data arrays */
  188. p16=(const uint16_t *)(header+1);
  189. trie->index=p16;
  190. p16+=trie->indexLength;
  191. /* get the data */
  192. switch(valueBits) {
  193. case UTRIE2_16_VALUE_BITS:
  194. trie->data16=p16;
  195. trie->data32=nullptr;
  196. trie->initialValue=trie->index[trie->dataNullOffset];
  197. trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
  198. break;
  199. case UTRIE2_32_VALUE_BITS:
  200. trie->data16=nullptr;
  201. trie->data32=(const uint32_t *)p16;
  202. trie->initialValue=trie->data32[trie->dataNullOffset];
  203. trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
  204. break;
  205. default:
  206. *pErrorCode=U_INVALID_FORMAT_ERROR;
  207. return nullptr;
  208. }
  209. if(pActualLength!=nullptr) {
  210. *pActualLength=actualLength;
  211. }
  212. return trie;
  213. }
  214. U_CAPI UTrie2 * U_EXPORT2
  215. utrie2_openDummy(UTrie2ValueBits valueBits,
  216. uint32_t initialValue, uint32_t errorValue,
  217. UErrorCode *pErrorCode) {
  218. UTrie2 *trie;
  219. UTrie2Header *header;
  220. uint32_t *p;
  221. uint16_t *dest16;
  222. int32_t indexLength, dataLength, length, i;
  223. int32_t dataMove; /* >0 if the data is moved to the end of the index array */
  224. if(U_FAILURE(*pErrorCode)) {
  225. return nullptr;
  226. }
  227. if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
  228. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  229. return nullptr;
  230. }
  231. /* calculate the total length of the dummy trie data */
  232. indexLength=UTRIE2_INDEX_1_OFFSET;
  233. dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
  234. length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
  235. if(valueBits==UTRIE2_16_VALUE_BITS) {
  236. length+=dataLength*2;
  237. } else {
  238. length+=dataLength*4;
  239. }
  240. /* allocate the trie */
  241. trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
  242. if(trie==nullptr) {
  243. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  244. return nullptr;
  245. }
  246. uprv_memset(trie, 0, sizeof(UTrie2));
  247. trie->memory=uprv_malloc(length);
  248. if(trie->memory==nullptr) {
  249. uprv_free(trie);
  250. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  251. return nullptr;
  252. }
  253. trie->length=length;
  254. trie->isMemoryOwned=true;
  255. /* set the UTrie2 fields */
  256. if(valueBits==UTRIE2_16_VALUE_BITS) {
  257. dataMove=indexLength;
  258. } else {
  259. dataMove=0;
  260. }
  261. trie->indexLength=indexLength;
  262. trie->dataLength=dataLength;
  263. trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
  264. trie->dataNullOffset=(uint16_t)dataMove;
  265. trie->initialValue=initialValue;
  266. trie->errorValue=errorValue;
  267. trie->highStart=0;
  268. trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
  269. #ifdef UTRIE2_DEBUG
  270. trie->name="dummy";
  271. #endif
  272. /* set the header fields */
  273. header=(UTrie2Header *)trie->memory;
  274. header->signature=UTRIE2_SIG; /* "Tri2" */
  275. header->options=(uint16_t)valueBits;
  276. header->indexLength=(uint16_t)indexLength;
  277. header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
  278. header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
  279. header->dataNullOffset=(uint16_t)dataMove;
  280. header->shiftedHighStart=0;
  281. /* fill the index and data arrays */
  282. dest16=(uint16_t *)(header+1);
  283. trie->index=dest16;
  284. /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
  285. for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
  286. *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */
  287. }
  288. /* write UTF-8 2-byte index-2 values, not right-shifted */
  289. for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
  290. *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
  291. }
  292. for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
  293. *dest16++=(uint16_t)dataMove;
  294. }
  295. /* write the 16/32-bit data array */
  296. switch(valueBits) {
  297. case UTRIE2_16_VALUE_BITS:
  298. /* write 16-bit data values */
  299. trie->data16=dest16;
  300. trie->data32=nullptr;
  301. for(i=0; i<0x80; ++i) {
  302. *dest16++=(uint16_t)initialValue;
  303. }
  304. for(; i<0xc0; ++i) {
  305. *dest16++=(uint16_t)errorValue;
  306. }
  307. /* highValue and reserved values */
  308. for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
  309. *dest16++=(uint16_t)initialValue;
  310. }
  311. break;
  312. case UTRIE2_32_VALUE_BITS:
  313. /* write 32-bit data values */
  314. p=(uint32_t *)dest16;
  315. trie->data16=nullptr;
  316. trie->data32=p;
  317. for(i=0; i<0x80; ++i) {
  318. *p++=initialValue;
  319. }
  320. for(; i<0xc0; ++i) {
  321. *p++=errorValue;
  322. }
  323. /* highValue and reserved values */
  324. for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
  325. *p++=initialValue;
  326. }
  327. break;
  328. default:
  329. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  330. return nullptr;
  331. }
  332. return trie;
  333. }
  334. U_CAPI void U_EXPORT2
  335. utrie2_close(UTrie2 *trie) {
  336. if(trie!=nullptr) {
  337. if(trie->isMemoryOwned) {
  338. uprv_free(trie->memory);
  339. }
  340. if(trie->newTrie!=nullptr) {
  341. uprv_free(trie->newTrie->data);
  342. #ifdef UCPTRIE_DEBUG
  343. umutablecptrie_close(trie->newTrie->t3);
  344. #endif
  345. uprv_free(trie->newTrie);
  346. }
  347. uprv_free(trie);
  348. }
  349. }
  350. U_CAPI UBool U_EXPORT2
  351. utrie2_isFrozen(const UTrie2 *trie) {
  352. return trie->newTrie==nullptr;
  353. }
  354. U_CAPI int32_t U_EXPORT2
  355. utrie2_serialize(const UTrie2 *trie,
  356. void *data, int32_t capacity,
  357. UErrorCode *pErrorCode) {
  358. /* argument check */
  359. if(U_FAILURE(*pErrorCode)) {
  360. return 0;
  361. }
  362. if( trie==nullptr || trie->memory==nullptr || trie->newTrie!=nullptr ||
  363. capacity<0 || (capacity>0 && (data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)))
  364. ) {
  365. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  366. return 0;
  367. }
  368. if(capacity>=trie->length) {
  369. uprv_memcpy(data, trie->memory, trie->length);
  370. } else {
  371. *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  372. }
  373. return trie->length;
  374. }
  375. /* enumeration -------------------------------------------------------------- */
  376. #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
  377. /* default UTrie2EnumValue() returns the input value itself */
  378. static uint32_t U_CALLCONV
  379. enumSameValue(const void * /*context*/, uint32_t value) {
  380. return value;
  381. }
  382. /**
  383. * Enumerate all ranges of code points with the same relevant values.
  384. * The values are transformed from the raw trie entries by the enumValue function.
  385. *
  386. * Currently requires start<limit and both start and limit must be multiples
  387. * of UTRIE2_DATA_BLOCK_LENGTH.
  388. *
  389. * Optimizations:
  390. * - Skip a whole block if we know that it is filled with a single value,
  391. * and it is the same as we visited just before.
  392. * - Handle the null block specially because we know a priori that it is filled
  393. * with a single value.
  394. */
  395. static void
  396. enumEitherTrie(const UTrie2 *trie,
  397. UChar32 start, UChar32 limit,
  398. UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
  399. const uint32_t *data32;
  400. const uint16_t *idx;
  401. uint32_t value, prevValue, initialValue;
  402. UChar32 c, prev, highStart;
  403. int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
  404. if(enumRange==nullptr) {
  405. return;
  406. }
  407. if(enumValue==nullptr) {
  408. enumValue=enumSameValue;
  409. }
  410. if(trie->newTrie==nullptr) {
  411. /* frozen trie */
  412. idx=trie->index;
  413. U_ASSERT(idx!=nullptr); /* the following code assumes trie->newTrie is not nullptr when idx is nullptr */
  414. data32=trie->data32;
  415. index2NullOffset=trie->index2NullOffset;
  416. nullBlock=trie->dataNullOffset;
  417. } else {
  418. /* unfrozen, mutable trie */
  419. idx=nullptr;
  420. data32=trie->newTrie->data;
  421. U_ASSERT(data32!=nullptr); /* the following code assumes idx is not nullptr when data32 is nullptr */
  422. index2NullOffset=trie->newTrie->index2NullOffset;
  423. nullBlock=trie->newTrie->dataNullOffset;
  424. }
  425. highStart=trie->highStart;
  426. /* get the enumeration value that corresponds to an initial-value trie data entry */
  427. initialValue=enumValue(context, trie->initialValue);
  428. /* set variables for previous range */
  429. prevI2Block=-1;
  430. prevBlock=-1;
  431. prev=start;
  432. prevValue=0;
  433. /* enumerate index-2 blocks */
  434. for(c=start; c<limit && c<highStart;) {
  435. /* Code point limit for iterating inside this i2Block. */
  436. UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
  437. if(limit<tempLimit) {
  438. tempLimit=limit;
  439. }
  440. if(c<=0xffff) {
  441. if(!U_IS_SURROGATE(c)) {
  442. i2Block=c>>UTRIE2_SHIFT_2;
  443. } else if(U_IS_SURROGATE_LEAD(c)) {
  444. /*
  445. * Enumerate values for lead surrogate code points, not code units:
  446. * This special block has half the normal length.
  447. */
  448. i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
  449. tempLimit=MIN_VALUE(0xdc00, limit);
  450. } else {
  451. /*
  452. * Switch back to the normal part of the index-2 table.
  453. * Enumerate the second half of the surrogates block.
  454. */
  455. i2Block=0xd800>>UTRIE2_SHIFT_2;
  456. tempLimit=MIN_VALUE(0xe000, limit);
  457. }
  458. } else {
  459. /* supplementary code points */
  460. if(idx!=nullptr) {
  461. i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
  462. (c>>UTRIE2_SHIFT_1)];
  463. } else {
  464. i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
  465. }
  466. if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
  467. /*
  468. * The index-2 block is the same as the previous one, and filled with prevValue.
  469. * Only possible for supplementary code points because the linear-BMP index-2
  470. * table creates unique i2Block values.
  471. */
  472. c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
  473. continue;
  474. }
  475. }
  476. prevI2Block=i2Block;
  477. if(i2Block==index2NullOffset) {
  478. /* this is the null index-2 block */
  479. if(prevValue!=initialValue) {
  480. if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
  481. return;
  482. }
  483. prevBlock=nullBlock;
  484. prev=c;
  485. prevValue=initialValue;
  486. }
  487. c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
  488. } else {
  489. /* enumerate data blocks for one index-2 block */
  490. int32_t i2, i2Limit;
  491. i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
  492. if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
  493. i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
  494. } else {
  495. i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
  496. }
  497. for(; i2<i2Limit; ++i2) {
  498. if(idx!=nullptr) {
  499. block = static_cast<int32_t>(idx[i2Block + i2]) << UTRIE2_INDEX_SHIFT;
  500. } else {
  501. block=trie->newTrie->index2[i2Block+i2];
  502. }
  503. if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
  504. /* the block is the same as the previous one, and filled with prevValue */
  505. c+=UTRIE2_DATA_BLOCK_LENGTH;
  506. continue;
  507. }
  508. prevBlock=block;
  509. if(block==nullBlock) {
  510. /* this is the null data block */
  511. if(prevValue!=initialValue) {
  512. if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
  513. return;
  514. }
  515. prev=c;
  516. prevValue=initialValue;
  517. }
  518. c+=UTRIE2_DATA_BLOCK_LENGTH;
  519. } else {
  520. for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
  521. value=enumValue(context, data32!=nullptr ? data32[block+j] : idx[block+j]);
  522. if(value!=prevValue) {
  523. if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
  524. return;
  525. }
  526. prev=c;
  527. prevValue=value;
  528. }
  529. ++c;
  530. }
  531. }
  532. }
  533. }
  534. }
  535. if(c>limit) {
  536. c=limit; /* could be higher if in the index2NullOffset */
  537. } else if(c<limit) {
  538. /* c==highStart<limit */
  539. uint32_t highValue;
  540. if(idx!=nullptr) {
  541. highValue=
  542. data32!=nullptr ?
  543. data32[trie->highValueIndex] :
  544. idx[trie->highValueIndex];
  545. } else {
  546. highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
  547. }
  548. value=enumValue(context, highValue);
  549. if(value!=prevValue) {
  550. if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
  551. return;
  552. }
  553. prev=c;
  554. prevValue=value;
  555. }
  556. c=limit;
  557. }
  558. /* deliver last range */
  559. enumRange(context, prev, c-1, prevValue);
  560. }
  561. U_CAPI void U_EXPORT2
  562. utrie2_enum(const UTrie2 *trie,
  563. UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
  564. enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
  565. }
  566. U_CAPI void U_EXPORT2
  567. utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
  568. UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
  569. const void *context) {
  570. if(!U16_IS_LEAD(lead)) {
  571. return;
  572. }
  573. lead=(lead-0xd7c0)<<10; /* start code point */
  574. enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
  575. }
  576. /* C++ convenience wrappers ------------------------------------------------- */
  577. U_NAMESPACE_BEGIN
  578. uint16_t BackwardUTrie2StringIterator::previous16() {
  579. codePointLimit=codePointStart;
  580. if(start>=codePointStart) {
  581. codePoint=U_SENTINEL;
  582. return static_cast<uint16_t>(trie->errorValue);
  583. }
  584. uint16_t result;
  585. UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
  586. return result;
  587. }
  588. uint16_t ForwardUTrie2StringIterator::next16() {
  589. codePointStart=codePointLimit;
  590. if(codePointLimit==limit) {
  591. codePoint=U_SENTINEL;
  592. return static_cast<uint16_t>(trie->errorValue);
  593. }
  594. uint16_t result;
  595. UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
  596. return result;
  597. }
  598. U_NAMESPACE_END