uset.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2002-2011, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: uset.cpp
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2002mar07
  16. * created by: Markus W. Scherer
  17. *
  18. * There are functions to efficiently serialize a USet into an array of uint16_t
  19. * and functions to use such a serialized form efficiently without
  20. * instantiating a new USet.
  21. */
  22. #include "unicode/utypes.h"
  23. #include "unicode/uobject.h"
  24. #include "unicode/uset.h"
  25. #include "unicode/uniset.h"
  26. #include "cmemory.h"
  27. #include "unicode/ustring.h"
  28. #include "unicode/parsepos.h"
  29. U_NAMESPACE_USE
  30. U_CAPI USet* U_EXPORT2
  31. uset_openEmpty() {
  32. return (USet*) new UnicodeSet();
  33. }
  34. U_CAPI USet* U_EXPORT2
  35. uset_open(UChar32 start, UChar32 end) {
  36. return (USet*) new UnicodeSet(start, end);
  37. }
  38. U_CAPI void U_EXPORT2
  39. uset_close(USet* set) {
  40. delete (UnicodeSet*) set;
  41. }
  42. U_CAPI USet * U_EXPORT2
  43. uset_clone(const USet *set) {
  44. return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
  45. }
  46. U_CAPI UBool U_EXPORT2
  47. uset_isFrozen(const USet *set) {
  48. return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
  49. }
  50. U_CAPI void U_EXPORT2
  51. uset_freeze(USet *set) {
  52. ((UnicodeSet*) set)->UnicodeSet::freeze();
  53. }
  54. U_CAPI USet * U_EXPORT2
  55. uset_cloneAsThawed(const USet *set) {
  56. return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
  57. }
  58. U_CAPI void U_EXPORT2
  59. uset_set(USet* set,
  60. UChar32 start, UChar32 end) {
  61. ((UnicodeSet*) set)->UnicodeSet::set(start, end);
  62. }
  63. U_CAPI void U_EXPORT2
  64. uset_addAll(USet* set, const USet *additionalSet) {
  65. ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
  66. }
  67. U_CAPI void U_EXPORT2
  68. uset_add(USet* set, UChar32 c) {
  69. ((UnicodeSet*) set)->UnicodeSet::add(c);
  70. }
  71. U_CAPI void U_EXPORT2
  72. uset_addRange(USet* set, UChar32 start, UChar32 end) {
  73. ((UnicodeSet*) set)->UnicodeSet::add(start, end);
  74. }
  75. U_CAPI void U_EXPORT2
  76. uset_addString(USet* set, const char16_t* str, int32_t strLen) {
  77. // UnicodeString handles -1 for strLen
  78. UnicodeString s(strLen<0, str, strLen);
  79. ((UnicodeSet*) set)->UnicodeSet::add(s);
  80. }
  81. U_CAPI void U_EXPORT2
  82. uset_addAllCodePoints(USet* set, const char16_t *str, int32_t strLen) {
  83. // UnicodeString handles -1 for strLen
  84. UnicodeString s(str, strLen);
  85. ((UnicodeSet*) set)->UnicodeSet::addAll(s);
  86. }
  87. U_CAPI void U_EXPORT2
  88. uset_remove(USet* set, UChar32 c) {
  89. ((UnicodeSet*) set)->UnicodeSet::remove(c);
  90. }
  91. U_CAPI void U_EXPORT2
  92. uset_removeRange(USet* set, UChar32 start, UChar32 end) {
  93. ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
  94. }
  95. U_CAPI void U_EXPORT2
  96. uset_removeString(USet* set, const char16_t* str, int32_t strLen) {
  97. UnicodeString s(strLen==-1, str, strLen);
  98. ((UnicodeSet*) set)->UnicodeSet::remove(s);
  99. }
  100. U_CAPI void U_EXPORT2
  101. uset_removeAllCodePoints(USet *set, const char16_t *str, int32_t length) {
  102. UnicodeString s(length==-1, str, length);
  103. ((UnicodeSet*) set)->UnicodeSet::removeAll(s);
  104. }
  105. U_CAPI void U_EXPORT2
  106. uset_removeAll(USet* set, const USet* remove) {
  107. ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
  108. }
  109. U_CAPI void U_EXPORT2
  110. uset_retain(USet* set, UChar32 start, UChar32 end) {
  111. ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
  112. }
  113. U_CAPI void U_EXPORT2
  114. uset_retainString(USet *set, const char16_t *str, int32_t length) {
  115. UnicodeString s(length==-1, str, length);
  116. ((UnicodeSet*) set)->UnicodeSet::retain(s);
  117. }
  118. U_CAPI void U_EXPORT2
  119. uset_retainAllCodePoints(USet *set, const char16_t *str, int32_t length) {
  120. UnicodeString s(length==-1, str, length);
  121. ((UnicodeSet*) set)->UnicodeSet::retainAll(s);
  122. }
  123. U_CAPI void U_EXPORT2
  124. uset_retainAll(USet* set, const USet* retain) {
  125. ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
  126. }
  127. U_CAPI void U_EXPORT2
  128. uset_compact(USet* set) {
  129. ((UnicodeSet*) set)->UnicodeSet::compact();
  130. }
  131. U_CAPI void U_EXPORT2
  132. uset_complement(USet* set) {
  133. ((UnicodeSet*) set)->UnicodeSet::complement();
  134. }
  135. U_CAPI void U_EXPORT2
  136. uset_complementRange(USet *set, UChar32 start, UChar32 end) {
  137. ((UnicodeSet*) set)->UnicodeSet::complement(start, end);
  138. }
  139. U_CAPI void U_EXPORT2
  140. uset_complementString(USet *set, const char16_t *str, int32_t length) {
  141. UnicodeString s(length==-1, str, length);
  142. ((UnicodeSet*) set)->UnicodeSet::complement(s);
  143. }
  144. U_CAPI void U_EXPORT2
  145. uset_complementAllCodePoints(USet *set, const char16_t *str, int32_t length) {
  146. UnicodeString s(length==-1, str, length);
  147. ((UnicodeSet*) set)->UnicodeSet::complementAll(s);
  148. }
  149. U_CAPI void U_EXPORT2
  150. uset_complementAll(USet* set, const USet* complement) {
  151. ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
  152. }
  153. U_CAPI void U_EXPORT2
  154. uset_clear(USet* set) {
  155. ((UnicodeSet*) set)->UnicodeSet::clear();
  156. }
  157. U_CAPI void U_EXPORT2
  158. uset_removeAllStrings(USet* set) {
  159. ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
  160. }
  161. U_CAPI UBool U_EXPORT2
  162. uset_isEmpty(const USet* set) {
  163. return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
  164. }
  165. U_CAPI UBool U_EXPORT2
  166. uset_hasStrings(const USet* set) {
  167. return ((const UnicodeSet*) set)->UnicodeSet::hasStrings();
  168. }
  169. U_CAPI UBool U_EXPORT2
  170. uset_contains(const USet* set, UChar32 c) {
  171. return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
  172. }
  173. U_CAPI UBool U_EXPORT2
  174. uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
  175. return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
  176. }
  177. U_CAPI UBool U_EXPORT2
  178. uset_containsString(const USet* set, const char16_t* str, int32_t strLen) {
  179. UnicodeString s(strLen==-1, str, strLen);
  180. return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
  181. }
  182. U_CAPI UBool U_EXPORT2
  183. uset_containsAll(const USet* set1, const USet* set2) {
  184. return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
  185. }
  186. U_CAPI UBool U_EXPORT2
  187. uset_containsAllCodePoints(const USet* set, const char16_t *str, int32_t strLen) {
  188. // Create a string alias, since nothing is being added to the set.
  189. UnicodeString s(strLen==-1, str, strLen);
  190. return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
  191. }
  192. U_CAPI UBool U_EXPORT2
  193. uset_containsNone(const USet* set1, const USet* set2) {
  194. return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
  195. }
  196. U_CAPI UBool U_EXPORT2
  197. uset_containsSome(const USet* set1, const USet* set2) {
  198. return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
  199. }
  200. U_CAPI int32_t U_EXPORT2
  201. uset_span(const USet *set, const char16_t *s, int32_t length, USetSpanCondition spanCondition) {
  202. return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
  203. }
  204. U_CAPI int32_t U_EXPORT2
  205. uset_spanBack(const USet *set, const char16_t *s, int32_t length, USetSpanCondition spanCondition) {
  206. return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
  207. }
  208. U_CAPI int32_t U_EXPORT2
  209. uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
  210. return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
  211. }
  212. U_CAPI int32_t U_EXPORT2
  213. uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
  214. return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
  215. }
  216. U_CAPI UBool U_EXPORT2
  217. uset_equals(const USet* set1, const USet* set2) {
  218. return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
  219. }
  220. U_CAPI int32_t U_EXPORT2
  221. uset_indexOf(const USet* set, UChar32 c) {
  222. return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
  223. }
  224. U_CAPI UChar32 U_EXPORT2
  225. uset_charAt(const USet* set, int32_t index) {
  226. return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
  227. }
  228. U_CAPI int32_t U_EXPORT2
  229. uset_size(const USet* set) {
  230. return ((const UnicodeSet*) set)->UnicodeSet::size();
  231. }
  232. U_NAMESPACE_BEGIN
  233. /**
  234. * This class only exists to provide access to the UnicodeSet private
  235. * USet support API. Declaring a class a friend is more portable than
  236. * trying to declare extern "C" functions as friends.
  237. */
  238. class USetAccess /* not : public UObject because all methods are static */ {
  239. public:
  240. /* Try to have the compiler inline these*/
  241. inline static int32_t getStringCount(const UnicodeSet& set) {
  242. return set.stringsSize();
  243. }
  244. inline static const UnicodeString* getString(const UnicodeSet& set,
  245. int32_t i) {
  246. return set.getString(i);
  247. }
  248. private:
  249. /* do not instantiate*/
  250. USetAccess();
  251. };
  252. U_NAMESPACE_END
  253. U_CAPI int32_t U_EXPORT2
  254. uset_getRangeCount(const USet *set) {
  255. return ((const UnicodeSet *)set)->UnicodeSet::getRangeCount();
  256. }
  257. U_CAPI int32_t U_EXPORT2
  258. uset_getItemCount(const USet* uset) {
  259. const UnicodeSet& set = *(const UnicodeSet*)uset;
  260. return set.getRangeCount() + USetAccess::getStringCount(set);
  261. }
  262. U_CAPI int32_t U_EXPORT2
  263. uset_getItem(const USet* uset, int32_t itemIndex,
  264. UChar32* start, UChar32* end,
  265. char16_t* str, int32_t strCapacity,
  266. UErrorCode* ec) {
  267. if (U_FAILURE(*ec)) return 0;
  268. const UnicodeSet& set = *(const UnicodeSet*)uset;
  269. int32_t rangeCount;
  270. if (itemIndex < 0) {
  271. *ec = U_ILLEGAL_ARGUMENT_ERROR;
  272. return -1;
  273. } else if (itemIndex < (rangeCount = set.getRangeCount())) {
  274. *start = set.getRangeStart(itemIndex);
  275. *end = set.getRangeEnd(itemIndex);
  276. return 0;
  277. } else {
  278. itemIndex -= rangeCount;
  279. if (itemIndex < USetAccess::getStringCount(set)) {
  280. const UnicodeString* s = USetAccess::getString(set, itemIndex);
  281. return s->extract(str, strCapacity, *ec);
  282. } else {
  283. *ec = U_INDEX_OUTOFBOUNDS_ERROR;
  284. return -1;
  285. }
  286. }
  287. }
  288. //U_CAPI UBool U_EXPORT2
  289. //uset_getRange(const USet* set, int32_t rangeIndex,
  290. // UChar32* pStart, UChar32* pEnd) {
  291. // if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
  292. // return false;
  293. // }
  294. // const UnicodeSet* us = (const UnicodeSet*) set;
  295. // *pStart = us->getRangeStart(rangeIndex);
  296. // *pEnd = us->getRangeEnd(rangeIndex);
  297. // return true;
  298. //}
  299. /*
  300. * Serialize a USet into 16-bit units.
  301. * Store BMP code points as themselves with one 16-bit unit each.
  302. *
  303. * Important: the code points in the array are in ascending order,
  304. * therefore all BMP code points precede all supplementary code points.
  305. *
  306. * Store each supplementary code point in 2 16-bit units,
  307. * simply with higher-then-lower 16-bit halves.
  308. *
  309. * Precede the entire list with the length.
  310. * If there are supplementary code points, then set bit 15 in the length
  311. * and add the bmpLength between it and the array.
  312. *
  313. * In other words:
  314. * - all BMP: (length=bmpLength) BMP, .., BMP
  315. * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
  316. */
  317. U_CAPI int32_t U_EXPORT2
  318. uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
  319. if (ec==nullptr || U_FAILURE(*ec)) {
  320. return 0;
  321. }
  322. return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
  323. }
  324. U_CAPI UBool U_EXPORT2
  325. uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
  326. int32_t length;
  327. if(fillSet==nullptr) {
  328. return false;
  329. }
  330. if(src==nullptr || srcLength<=0) {
  331. fillSet->length=fillSet->bmpLength=0;
  332. return false;
  333. }
  334. length=*src++;
  335. if(length&0x8000) {
  336. /* there are supplementary values */
  337. length&=0x7fff;
  338. if(srcLength<(2+length)) {
  339. fillSet->length=fillSet->bmpLength=0;
  340. return false;
  341. }
  342. fillSet->bmpLength=*src++;
  343. } else {
  344. /* only BMP values */
  345. if(srcLength<(1+length)) {
  346. fillSet->length=fillSet->bmpLength=0;
  347. return false;
  348. }
  349. fillSet->bmpLength=length;
  350. }
  351. fillSet->array=src;
  352. fillSet->length=length;
  353. return true;
  354. }
  355. U_CAPI void U_EXPORT2
  356. uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
  357. if(fillSet==nullptr || (uint32_t)c>0x10ffff) {
  358. return;
  359. }
  360. fillSet->array=fillSet->staticArray;
  361. if(c<0xffff) {
  362. fillSet->bmpLength=fillSet->length=2;
  363. fillSet->staticArray[0]=(uint16_t)c;
  364. fillSet->staticArray[1]=(uint16_t)c+1;
  365. } else if(c==0xffff) {
  366. fillSet->bmpLength=1;
  367. fillSet->length=3;
  368. fillSet->staticArray[0]=0xffff;
  369. fillSet->staticArray[1]=1;
  370. fillSet->staticArray[2]=0;
  371. } else if(c<0x10ffff) {
  372. fillSet->bmpLength=0;
  373. fillSet->length=4;
  374. fillSet->staticArray[0]=(uint16_t)(c>>16);
  375. fillSet->staticArray[1]=(uint16_t)c;
  376. ++c;
  377. fillSet->staticArray[2]=(uint16_t)(c>>16);
  378. fillSet->staticArray[3]=(uint16_t)c;
  379. } else /* c==0x10ffff */ {
  380. fillSet->bmpLength=0;
  381. fillSet->length=2;
  382. fillSet->staticArray[0]=0x10;
  383. fillSet->staticArray[1]=0xffff;
  384. }
  385. }
  386. U_CAPI UBool U_EXPORT2
  387. uset_serializedContains(const USerializedSet* set, UChar32 c) {
  388. const uint16_t* array;
  389. if(set==nullptr || (uint32_t)c>0x10ffff) {
  390. return false;
  391. }
  392. array=set->array;
  393. if(c<=0xffff) {
  394. /* find c in the BMP part */
  395. int32_t lo = 0;
  396. int32_t hi = set->bmpLength-1;
  397. if (c < array[0]) {
  398. hi = 0;
  399. } else if (c < array[hi]) {
  400. for(;;) {
  401. int32_t i = (lo + hi) >> 1;
  402. if (i == lo) {
  403. break; // Done!
  404. } else if (c < array[i]) {
  405. hi = i;
  406. } else {
  407. lo = i;
  408. }
  409. }
  410. } else {
  411. hi += 1;
  412. }
  413. return (UBool)(hi&1);
  414. } else {
  415. /* find c in the supplementary part */
  416. uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
  417. int32_t base = set->bmpLength;
  418. int32_t lo = 0;
  419. int32_t hi = set->length - 2 - base;
  420. if (high < array[base] || (high==array[base] && low<array[base+1])) {
  421. hi = 0;
  422. } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
  423. for (;;) {
  424. int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result
  425. int32_t iabs = i + base;
  426. if (i == lo) {
  427. break; // Done!
  428. } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
  429. hi = i;
  430. } else {
  431. lo = i;
  432. }
  433. }
  434. } else {
  435. hi += 2;
  436. }
  437. /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
  438. return (UBool)(((hi+(base<<1))&2)!=0);
  439. }
  440. }
  441. U_CAPI int32_t U_EXPORT2
  442. uset_getSerializedRangeCount(const USerializedSet* set) {
  443. if(set==nullptr) {
  444. return 0;
  445. }
  446. return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
  447. }
  448. U_CAPI UBool U_EXPORT2
  449. uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
  450. UChar32* pStart, UChar32* pEnd) {
  451. const uint16_t* array;
  452. int32_t bmpLength, length;
  453. if(set==nullptr || rangeIndex<0 || pStart==nullptr || pEnd==nullptr) {
  454. return false;
  455. }
  456. array=set->array;
  457. length=set->length;
  458. bmpLength=set->bmpLength;
  459. rangeIndex*=2; /* address start/limit pairs */
  460. if(rangeIndex<bmpLength) {
  461. *pStart=array[rangeIndex++];
  462. if(rangeIndex<bmpLength) {
  463. *pEnd=array[rangeIndex]-1;
  464. } else if(rangeIndex<length) {
  465. *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
  466. } else {
  467. *pEnd=0x10ffff;
  468. }
  469. return true;
  470. } else {
  471. rangeIndex-=bmpLength;
  472. rangeIndex*=2; /* address pairs of pairs of units */
  473. length-=bmpLength;
  474. if(rangeIndex<length) {
  475. array+=bmpLength;
  476. *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
  477. rangeIndex+=2;
  478. if(rangeIndex<length) {
  479. *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
  480. } else {
  481. *pEnd=0x10ffff;
  482. }
  483. return true;
  484. } else {
  485. return false;
  486. }
  487. }
  488. }
  489. // TODO The old, internal uset.c had an efficient uset_containsOne function.
  490. // Returned the one and only code point, or else -1 or something.
  491. // Consider adding such a function to both C and C++ UnicodeSet/uset.
  492. // See tools/gennorm/store.c for usage, now usetContainsOne there.
  493. // TODO Investigate incorporating this code into UnicodeSet to improve
  494. // efficiency.
  495. // ---
  496. // #define USET_GROW_DELTA 20
  497. //
  498. // static int32_t
  499. // findChar(const UChar32* array, int32_t length, UChar32 c) {
  500. // int32_t i;
  501. //
  502. // /* check the last range limit first for more efficient appending */
  503. // if(length>0) {
  504. // if(c>=array[length-1]) {
  505. // return length;
  506. // }
  507. //
  508. // /* do not check the last range limit again in the loop below */
  509. // --length;
  510. // }
  511. //
  512. // for(i=0; i<length && c>=array[i]; ++i) {}
  513. // return i;
  514. // }
  515. //
  516. // static UBool
  517. // addRemove(USet* set, UChar32 c, int32_t doRemove) {
  518. // int32_t i, length, more;
  519. //
  520. // if(set==nullptr || (uint32_t)c>0x10ffff) {
  521. // return false;
  522. // }
  523. //
  524. // length=set->length;
  525. // i=findChar(set->array, length, c);
  526. // if((i&1)^doRemove) {
  527. // /* c is already in the set */
  528. // return true;
  529. // }
  530. //
  531. // /* how many more array items do we need? */
  532. // if(i<length && (c+1)==set->array[i]) {
  533. // /* c is just before the following range, extend that in-place by one */
  534. // set->array[i]=c;
  535. // if(i>0) {
  536. // --i;
  537. // if(c==set->array[i]) {
  538. // /* the previous range collapsed, remove it */
  539. // set->length=length-=2;
  540. // if(i<length) {
  541. // uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
  542. // }
  543. // }
  544. // }
  545. // return true;
  546. // } else if(i>0 && c==set->array[i-1]) {
  547. // /* c is just after the previous range, extend that in-place by one */
  548. // if(++c<=0x10ffff) {
  549. // set->array[i-1]=c;
  550. // if(i<length && c==set->array[i]) {
  551. // /* the following range collapsed, remove it */
  552. // --i;
  553. // set->length=length-=2;
  554. // if(i<length) {
  555. // uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
  556. // }
  557. // }
  558. // } else {
  559. // /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
  560. // set->length=i-1;
  561. // }
  562. // return true;
  563. // } else if(i==length && c==0x10ffff) {
  564. // /* insert one range limit c */
  565. // more=1;
  566. // } else {
  567. // /* insert two range limits c, c+1 */
  568. // more=2;
  569. // }
  570. //
  571. // /* insert <more> range limits */
  572. // if(length+more>set->capacity) {
  573. // /* reallocate */
  574. // int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
  575. // UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
  576. // if(newArray==nullptr) {
  577. // return false;
  578. // }
  579. // set->capacity=newCapacity;
  580. // uprv_memcpy(newArray, set->array, length*4);
  581. //
  582. // if(set->array!=set->staticBuffer) {
  583. // uprv_free(set->array);
  584. // }
  585. // set->array=newArray;
  586. // }
  587. //
  588. // if(i<length) {
  589. // uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
  590. // }
  591. // set->array[i]=c;
  592. // if(more==2) {
  593. // set->array[i+1]=c+1;
  594. // }
  595. // set->length+=more;
  596. //
  597. // return true;
  598. // }
  599. //
  600. // U_CAPI UBool U_EXPORT2
  601. // uset_add(USet* set, UChar32 c) {
  602. // return addRemove(set, c, 0);
  603. // }
  604. //
  605. // U_CAPI void U_EXPORT2
  606. // uset_remove(USet* set, UChar32 c) {
  607. // addRemove(set, c, 1);
  608. // }