uset.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2002-2011, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: uset.cpp
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2002mar07
  16. * created by: Markus W. Scherer
  17. *
  18. * There are functions to efficiently serialize a USet into an array of uint16_t
  19. * and functions to use such a serialized form efficiently without
  20. * instantiating a new USet.
  21. */
  22. #include "unicode/utypes.h"
  23. #include "unicode/char16ptr.h"
  24. #include "unicode/uobject.h"
  25. #include "unicode/uset.h"
  26. #include "unicode/uniset.h"
  27. #include "cmemory.h"
  28. #include "unicode/ustring.h"
  29. #include "unicode/parsepos.h"
  30. U_NAMESPACE_USE
  31. U_CAPI USet* U_EXPORT2
  32. uset_openEmpty() {
  33. return (USet*) new UnicodeSet();
  34. }
  35. U_CAPI USet* U_EXPORT2
  36. uset_open(UChar32 start, UChar32 end) {
  37. return (USet*) new UnicodeSet(start, end);
  38. }
  39. U_CAPI void U_EXPORT2
  40. uset_close(USet* set) {
  41. delete (UnicodeSet*) set;
  42. }
  43. U_CAPI USet * U_EXPORT2
  44. uset_clone(const USet *set) {
  45. return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
  46. }
  47. U_CAPI UBool U_EXPORT2
  48. uset_isFrozen(const USet *set) {
  49. return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
  50. }
  51. U_CAPI void U_EXPORT2
  52. uset_freeze(USet *set) {
  53. ((UnicodeSet*) set)->UnicodeSet::freeze();
  54. }
  55. U_CAPI USet * U_EXPORT2
  56. uset_cloneAsThawed(const USet *set) {
  57. return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
  58. }
  59. U_CAPI void U_EXPORT2
  60. uset_set(USet* set,
  61. UChar32 start, UChar32 end) {
  62. ((UnicodeSet*) set)->UnicodeSet::set(start, end);
  63. }
  64. U_CAPI void U_EXPORT2
  65. uset_addAll(USet* set, const USet *additionalSet) {
  66. ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
  67. }
  68. U_CAPI void U_EXPORT2
  69. uset_add(USet* set, UChar32 c) {
  70. ((UnicodeSet*) set)->UnicodeSet::add(c);
  71. }
  72. U_CAPI void U_EXPORT2
  73. uset_addRange(USet* set, UChar32 start, UChar32 end) {
  74. ((UnicodeSet*) set)->UnicodeSet::add(start, end);
  75. }
  76. U_CAPI void U_EXPORT2
  77. uset_addString(USet* set, const char16_t* str, int32_t strLen) {
  78. // UnicodeString handles -1 for strLen
  79. UnicodeString s(strLen<0, str, strLen);
  80. ((UnicodeSet*) set)->UnicodeSet::add(s);
  81. }
  82. U_CAPI void U_EXPORT2
  83. uset_addAllCodePoints(USet* set, const char16_t *str, int32_t strLen) {
  84. // UnicodeString handles -1 for strLen
  85. UnicodeString s(str, strLen);
  86. ((UnicodeSet*) set)->UnicodeSet::addAll(s);
  87. }
  88. U_CAPI void U_EXPORT2
  89. uset_remove(USet* set, UChar32 c) {
  90. ((UnicodeSet*) set)->UnicodeSet::remove(c);
  91. }
  92. U_CAPI void U_EXPORT2
  93. uset_removeRange(USet* set, UChar32 start, UChar32 end) {
  94. ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
  95. }
  96. U_CAPI void U_EXPORT2
  97. uset_removeString(USet* set, const char16_t* str, int32_t strLen) {
  98. UnicodeString s(strLen==-1, str, strLen);
  99. ((UnicodeSet*) set)->UnicodeSet::remove(s);
  100. }
  101. U_CAPI void U_EXPORT2
  102. uset_removeAllCodePoints(USet *set, const char16_t *str, int32_t length) {
  103. UnicodeString s(length==-1, str, length);
  104. ((UnicodeSet*) set)->UnicodeSet::removeAll(s);
  105. }
  106. U_CAPI void U_EXPORT2
  107. uset_removeAll(USet* set, const USet* remove) {
  108. ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
  109. }
  110. U_CAPI void U_EXPORT2
  111. uset_retain(USet* set, UChar32 start, UChar32 end) {
  112. ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
  113. }
  114. U_CAPI void U_EXPORT2
  115. uset_retainString(USet *set, const char16_t *str, int32_t length) {
  116. UnicodeString s(length==-1, str, length);
  117. ((UnicodeSet*) set)->UnicodeSet::retain(s);
  118. }
  119. U_CAPI void U_EXPORT2
  120. uset_retainAllCodePoints(USet *set, const char16_t *str, int32_t length) {
  121. UnicodeString s(length==-1, str, length);
  122. ((UnicodeSet*) set)->UnicodeSet::retainAll(s);
  123. }
  124. U_CAPI void U_EXPORT2
  125. uset_retainAll(USet* set, const USet* retain) {
  126. ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
  127. }
  128. U_CAPI void U_EXPORT2
  129. uset_compact(USet* set) {
  130. ((UnicodeSet*) set)->UnicodeSet::compact();
  131. }
  132. U_CAPI void U_EXPORT2
  133. uset_complement(USet* set) {
  134. ((UnicodeSet*) set)->UnicodeSet::complement();
  135. }
  136. U_CAPI void U_EXPORT2
  137. uset_complementRange(USet *set, UChar32 start, UChar32 end) {
  138. ((UnicodeSet*) set)->UnicodeSet::complement(start, end);
  139. }
  140. U_CAPI void U_EXPORT2
  141. uset_complementString(USet *set, const char16_t *str, int32_t length) {
  142. UnicodeString s(length==-1, str, length);
  143. ((UnicodeSet*) set)->UnicodeSet::complement(s);
  144. }
  145. U_CAPI void U_EXPORT2
  146. uset_complementAllCodePoints(USet *set, const char16_t *str, int32_t length) {
  147. UnicodeString s(length==-1, str, length);
  148. ((UnicodeSet*) set)->UnicodeSet::complementAll(s);
  149. }
  150. U_CAPI void U_EXPORT2
  151. uset_complementAll(USet* set, const USet* complement) {
  152. ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
  153. }
  154. U_CAPI void U_EXPORT2
  155. uset_clear(USet* set) {
  156. ((UnicodeSet*) set)->UnicodeSet::clear();
  157. }
  158. U_CAPI void U_EXPORT2
  159. uset_removeAllStrings(USet* set) {
  160. ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
  161. }
  162. U_CAPI UBool U_EXPORT2
  163. uset_isEmpty(const USet* set) {
  164. return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
  165. }
  166. U_CAPI UBool U_EXPORT2
  167. uset_hasStrings(const USet* set) {
  168. return ((const UnicodeSet*) set)->UnicodeSet::hasStrings();
  169. }
  170. U_CAPI UBool U_EXPORT2
  171. uset_contains(const USet* set, UChar32 c) {
  172. return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
  173. }
  174. U_CAPI UBool U_EXPORT2
  175. uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
  176. return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
  177. }
  178. U_CAPI UBool U_EXPORT2
  179. uset_containsString(const USet* set, const char16_t* str, int32_t strLen) {
  180. UnicodeString s(strLen==-1, str, strLen);
  181. return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
  182. }
  183. U_CAPI UBool U_EXPORT2
  184. uset_containsAll(const USet* set1, const USet* set2) {
  185. return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
  186. }
  187. U_CAPI UBool U_EXPORT2
  188. uset_containsAllCodePoints(const USet* set, const char16_t *str, int32_t strLen) {
  189. // Create a string alias, since nothing is being added to the set.
  190. UnicodeString s(strLen==-1, str, strLen);
  191. return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
  192. }
  193. U_CAPI UBool U_EXPORT2
  194. uset_containsNone(const USet* set1, const USet* set2) {
  195. return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
  196. }
  197. U_CAPI UBool U_EXPORT2
  198. uset_containsSome(const USet* set1, const USet* set2) {
  199. return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
  200. }
  201. U_CAPI int32_t U_EXPORT2
  202. uset_span(const USet *set, const char16_t *s, int32_t length, USetSpanCondition spanCondition) {
  203. return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
  204. }
  205. U_CAPI int32_t U_EXPORT2
  206. uset_spanBack(const USet *set, const char16_t *s, int32_t length, USetSpanCondition spanCondition) {
  207. return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
  208. }
  209. U_CAPI int32_t U_EXPORT2
  210. uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
  211. return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
  212. }
  213. U_CAPI int32_t U_EXPORT2
  214. uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
  215. return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
  216. }
  217. U_CAPI UBool U_EXPORT2
  218. uset_equals(const USet* set1, const USet* set2) {
  219. return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
  220. }
  221. U_CAPI int32_t U_EXPORT2
  222. uset_indexOf(const USet* set, UChar32 c) {
  223. return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
  224. }
  225. U_CAPI UChar32 U_EXPORT2
  226. uset_charAt(const USet* set, int32_t index) {
  227. return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
  228. }
  229. U_CAPI int32_t U_EXPORT2
  230. uset_size(const USet* set) {
  231. return ((const UnicodeSet*) set)->UnicodeSet::size();
  232. }
  233. U_NAMESPACE_BEGIN
  234. /**
  235. * This class only exists to provide access to the UnicodeSet private
  236. * USet support API. Declaring a class a friend is more portable than
  237. * trying to declare extern "C" functions as friends.
  238. */
  239. class USetAccess /* not : public UObject because all methods are static */ {
  240. public:
  241. /* Try to have the compiler inline these*/
  242. inline static int32_t getStringCount(const UnicodeSet& set) {
  243. return set.stringsSize();
  244. }
  245. inline static const UnicodeString* getString(const UnicodeSet& set,
  246. int32_t i) {
  247. return set.getString(i);
  248. }
  249. private:
  250. /* do not instantiate*/
  251. USetAccess();
  252. };
  253. U_NAMESPACE_END
  254. U_CAPI int32_t U_EXPORT2
  255. uset_getRangeCount(const USet *set) {
  256. return ((const UnicodeSet *)set)->UnicodeSet::getRangeCount();
  257. }
  258. U_CAPI int32_t U_EXPORT2
  259. uset_getStringCount(const USet *uset) {
  260. const UnicodeSet &set = *(const UnicodeSet *)uset;
  261. return USetAccess::getStringCount(set);
  262. }
  263. U_CAPI int32_t U_EXPORT2
  264. uset_getItemCount(const USet* uset) {
  265. const UnicodeSet& set = *(const UnicodeSet*)uset;
  266. return set.getRangeCount() + USetAccess::getStringCount(set);
  267. }
  268. U_CAPI const UChar* U_EXPORT2
  269. uset_getString(const USet *uset, int32_t index, int32_t *pLength) {
  270. if (pLength == nullptr) { return nullptr; }
  271. const UnicodeSet &set = *(const UnicodeSet *)uset;
  272. int32_t count = USetAccess::getStringCount(set);
  273. if (index < 0 || count <= index) {
  274. *pLength = 0;
  275. return nullptr;
  276. }
  277. const UnicodeString *s = USetAccess::getString(set, index);
  278. *pLength = s->length();
  279. return toUCharPtr(s->getBuffer());
  280. }
  281. U_CAPI int32_t U_EXPORT2
  282. uset_getItem(const USet* uset, int32_t itemIndex,
  283. UChar32* start, UChar32* end,
  284. char16_t* str, int32_t strCapacity,
  285. UErrorCode* ec) {
  286. if (U_FAILURE(*ec)) return 0;
  287. const UnicodeSet& set = *(const UnicodeSet*)uset;
  288. int32_t rangeCount;
  289. if (itemIndex < 0) {
  290. *ec = U_ILLEGAL_ARGUMENT_ERROR;
  291. return -1;
  292. } else if (itemIndex < (rangeCount = set.getRangeCount())) {
  293. *start = set.getRangeStart(itemIndex);
  294. *end = set.getRangeEnd(itemIndex);
  295. return 0;
  296. } else {
  297. itemIndex -= rangeCount;
  298. if (itemIndex < USetAccess::getStringCount(set)) {
  299. const UnicodeString* s = USetAccess::getString(set, itemIndex);
  300. return s->extract(str, strCapacity, *ec);
  301. } else {
  302. *ec = U_INDEX_OUTOFBOUNDS_ERROR;
  303. return -1;
  304. }
  305. }
  306. }
  307. //U_CAPI UBool U_EXPORT2
  308. //uset_getRange(const USet* set, int32_t rangeIndex,
  309. // UChar32* pStart, UChar32* pEnd) {
  310. // if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
  311. // return false;
  312. // }
  313. // const UnicodeSet* us = (const UnicodeSet*) set;
  314. // *pStart = us->getRangeStart(rangeIndex);
  315. // *pEnd = us->getRangeEnd(rangeIndex);
  316. // return true;
  317. //}
  318. /*
  319. * Serialize a USet into 16-bit units.
  320. * Store BMP code points as themselves with one 16-bit unit each.
  321. *
  322. * Important: the code points in the array are in ascending order,
  323. * therefore all BMP code points precede all supplementary code points.
  324. *
  325. * Store each supplementary code point in 2 16-bit units,
  326. * simply with higher-then-lower 16-bit halves.
  327. *
  328. * Precede the entire list with the length.
  329. * If there are supplementary code points, then set bit 15 in the length
  330. * and add the bmpLength between it and the array.
  331. *
  332. * In other words:
  333. * - all BMP: (length=bmpLength) BMP, .., BMP
  334. * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
  335. */
  336. U_CAPI int32_t U_EXPORT2
  337. uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
  338. if (ec==nullptr || U_FAILURE(*ec)) {
  339. return 0;
  340. }
  341. return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
  342. }
  343. U_CAPI UBool U_EXPORT2
  344. uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
  345. int32_t length;
  346. if(fillSet==nullptr) {
  347. return false;
  348. }
  349. if(src==nullptr || srcLength<=0) {
  350. fillSet->length=fillSet->bmpLength=0;
  351. return false;
  352. }
  353. length=*src++;
  354. if(length&0x8000) {
  355. /* there are supplementary values */
  356. length&=0x7fff;
  357. if(srcLength<(2+length)) {
  358. fillSet->length=fillSet->bmpLength=0;
  359. return false;
  360. }
  361. fillSet->bmpLength=*src++;
  362. } else {
  363. /* only BMP values */
  364. if(srcLength<(1+length)) {
  365. fillSet->length=fillSet->bmpLength=0;
  366. return false;
  367. }
  368. fillSet->bmpLength=length;
  369. }
  370. fillSet->array=src;
  371. fillSet->length=length;
  372. return true;
  373. }
  374. U_CAPI void U_EXPORT2
  375. uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
  376. if(fillSet==nullptr || (uint32_t)c>0x10ffff) {
  377. return;
  378. }
  379. fillSet->array=fillSet->staticArray;
  380. if(c<0xffff) {
  381. fillSet->bmpLength=fillSet->length=2;
  382. fillSet->staticArray[0]=(uint16_t)c;
  383. fillSet->staticArray[1]=(uint16_t)c+1;
  384. } else if(c==0xffff) {
  385. fillSet->bmpLength=1;
  386. fillSet->length=3;
  387. fillSet->staticArray[0]=0xffff;
  388. fillSet->staticArray[1]=1;
  389. fillSet->staticArray[2]=0;
  390. } else if(c<0x10ffff) {
  391. fillSet->bmpLength=0;
  392. fillSet->length=4;
  393. fillSet->staticArray[0]=(uint16_t)(c>>16);
  394. fillSet->staticArray[1]=(uint16_t)c;
  395. ++c;
  396. fillSet->staticArray[2]=(uint16_t)(c>>16);
  397. fillSet->staticArray[3]=(uint16_t)c;
  398. } else /* c==0x10ffff */ {
  399. fillSet->bmpLength=0;
  400. fillSet->length=2;
  401. fillSet->staticArray[0]=0x10;
  402. fillSet->staticArray[1]=0xffff;
  403. }
  404. }
  405. U_CAPI UBool U_EXPORT2
  406. uset_serializedContains(const USerializedSet* set, UChar32 c) {
  407. const uint16_t* array;
  408. if(set==nullptr || (uint32_t)c>0x10ffff) {
  409. return false;
  410. }
  411. array=set->array;
  412. if(c<=0xffff) {
  413. /* find c in the BMP part */
  414. int32_t lo = 0;
  415. int32_t hi = set->bmpLength-1;
  416. if (c < array[0]) {
  417. hi = 0;
  418. } else if (c < array[hi]) {
  419. for(;;) {
  420. int32_t i = (lo + hi) >> 1;
  421. if (i == lo) {
  422. break; // Done!
  423. } else if (c < array[i]) {
  424. hi = i;
  425. } else {
  426. lo = i;
  427. }
  428. }
  429. } else {
  430. hi += 1;
  431. }
  432. return hi&1;
  433. } else {
  434. /* find c in the supplementary part */
  435. uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
  436. int32_t base = set->bmpLength;
  437. int32_t lo = 0;
  438. int32_t hi = set->length - 2 - base;
  439. if (high < array[base] || (high==array[base] && low<array[base+1])) {
  440. hi = 0;
  441. } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
  442. for (;;) {
  443. int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result
  444. int32_t iabs = i + base;
  445. if (i == lo) {
  446. break; // Done!
  447. } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
  448. hi = i;
  449. } else {
  450. lo = i;
  451. }
  452. }
  453. } else {
  454. hi += 2;
  455. }
  456. /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
  457. return ((hi+(base<<1))&2)!=0;
  458. }
  459. }
  460. U_CAPI int32_t U_EXPORT2
  461. uset_getSerializedRangeCount(const USerializedSet* set) {
  462. if(set==nullptr) {
  463. return 0;
  464. }
  465. return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
  466. }
  467. U_CAPI UBool U_EXPORT2
  468. uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
  469. UChar32* pStart, UChar32* pEnd) {
  470. const uint16_t* array;
  471. int32_t bmpLength, length;
  472. if(set==nullptr || rangeIndex<0 || pStart==nullptr || pEnd==nullptr) {
  473. return false;
  474. }
  475. array=set->array;
  476. length=set->length;
  477. bmpLength=set->bmpLength;
  478. rangeIndex*=2; /* address start/limit pairs */
  479. if(rangeIndex<bmpLength) {
  480. *pStart=array[rangeIndex++];
  481. if(rangeIndex<bmpLength) {
  482. *pEnd=array[rangeIndex]-1;
  483. } else if(rangeIndex<length) {
  484. *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
  485. } else {
  486. *pEnd=0x10ffff;
  487. }
  488. return true;
  489. } else {
  490. rangeIndex-=bmpLength;
  491. rangeIndex*=2; /* address pairs of pairs of units */
  492. length-=bmpLength;
  493. if(rangeIndex<length) {
  494. array+=bmpLength;
  495. *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
  496. rangeIndex+=2;
  497. if(rangeIndex<length) {
  498. *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
  499. } else {
  500. *pEnd=0x10ffff;
  501. }
  502. return true;
  503. } else {
  504. return false;
  505. }
  506. }
  507. }
  508. // TODO The old, internal uset.c had an efficient uset_containsOne function.
  509. // Returned the one and only code point, or else -1 or something.
  510. // Consider adding such a function to both C and C++ UnicodeSet/uset.
  511. // See tools/gennorm/store.c for usage, now usetContainsOne there.
  512. // TODO Investigate incorporating this code into UnicodeSet to improve
  513. // efficiency.
  514. // ---
  515. // #define USET_GROW_DELTA 20
  516. //
  517. // static int32_t
  518. // findChar(const UChar32* array, int32_t length, UChar32 c) {
  519. // int32_t i;
  520. //
  521. // /* check the last range limit first for more efficient appending */
  522. // if(length>0) {
  523. // if(c>=array[length-1]) {
  524. // return length;
  525. // }
  526. //
  527. // /* do not check the last range limit again in the loop below */
  528. // --length;
  529. // }
  530. //
  531. // for(i=0; i<length && c>=array[i]; ++i) {}
  532. // return i;
  533. // }
  534. //
  535. // static UBool
  536. // addRemove(USet* set, UChar32 c, int32_t doRemove) {
  537. // int32_t i, length, more;
  538. //
  539. // if(set==nullptr || (uint32_t)c>0x10ffff) {
  540. // return false;
  541. // }
  542. //
  543. // length=set->length;
  544. // i=findChar(set->array, length, c);
  545. // if((i&1)^doRemove) {
  546. // /* c is already in the set */
  547. // return true;
  548. // }
  549. //
  550. // /* how many more array items do we need? */
  551. // if(i<length && (c+1)==set->array[i]) {
  552. // /* c is just before the following range, extend that in-place by one */
  553. // set->array[i]=c;
  554. // if(i>0) {
  555. // --i;
  556. // if(c==set->array[i]) {
  557. // /* the previous range collapsed, remove it */
  558. // set->length=length-=2;
  559. // if(i<length) {
  560. // uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
  561. // }
  562. // }
  563. // }
  564. // return true;
  565. // } else if(i>0 && c==set->array[i-1]) {
  566. // /* c is just after the previous range, extend that in-place by one */
  567. // if(++c<=0x10ffff) {
  568. // set->array[i-1]=c;
  569. // if(i<length && c==set->array[i]) {
  570. // /* the following range collapsed, remove it */
  571. // --i;
  572. // set->length=length-=2;
  573. // if(i<length) {
  574. // uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
  575. // }
  576. // }
  577. // } else {
  578. // /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
  579. // set->length=i-1;
  580. // }
  581. // return true;
  582. // } else if(i==length && c==0x10ffff) {
  583. // /* insert one range limit c */
  584. // more=1;
  585. // } else {
  586. // /* insert two range limits c, c+1 */
  587. // more=2;
  588. // }
  589. //
  590. // /* insert <more> range limits */
  591. // if(length+more>set->capacity) {
  592. // /* reallocate */
  593. // int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
  594. // UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
  595. // if(newArray==nullptr) {
  596. // return false;
  597. // }
  598. // set->capacity=newCapacity;
  599. // uprv_memcpy(newArray, set->array, length*4);
  600. //
  601. // if(set->array!=set->staticBuffer) {
  602. // uprv_free(set->array);
  603. // }
  604. // set->array=newArray;
  605. // }
  606. //
  607. // if(i<length) {
  608. // uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
  609. // }
  610. // set->array[i]=c;
  611. // if(more==2) {
  612. // set->array[i+1]=c+1;
  613. // }
  614. // set->length+=more;
  615. //
  616. // return true;
  617. // }
  618. //
  619. // U_CAPI UBool U_EXPORT2
  620. // uset_add(USet* set, UChar32 c) {
  621. // return addRemove(set, c, 0);
  622. // }
  623. //
  624. // U_CAPI void U_EXPORT2
  625. // uset_remove(USet* set, UChar32 c) {
  626. // addRemove(set, c, 1);
  627. // }