uiter.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2002-2011 International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: uiter.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2002jan18
  16. * created by: Markus W. Scherer
  17. */
  18. #ifndef __UITER_H__
  19. #define __UITER_H__
  20. /**
  21. * \file
  22. * \brief C API: Unicode Character Iteration
  23. *
  24. * @see UCharIterator
  25. */
  26. #include "unicode/utypes.h"
  27. #if U_SHOW_CPLUSPLUS_API
  28. U_NAMESPACE_BEGIN
  29. class CharacterIterator;
  30. class Replaceable;
  31. U_NAMESPACE_END
  32. #endif
  33. U_CDECL_BEGIN
  34. struct UCharIterator;
  35. typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
  36. /**
  37. * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
  38. * @see UCharIteratorMove
  39. * @see UCharIterator
  40. * @stable ICU 2.1
  41. */
  42. typedef enum UCharIteratorOrigin {
  43. UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
  44. } UCharIteratorOrigin;
  45. /** Constants for UCharIterator. @stable ICU 2.6 */
  46. enum {
  47. /**
  48. * Constant value that may be returned by UCharIteratorMove
  49. * indicating that the final UTF-16 index is not known, but that the move succeeded.
  50. * This can occur when moving relative to limit or length, or
  51. * when moving relative to the current index after a setState()
  52. * when the current UTF-16 index is not known.
  53. *
  54. * It would be very inefficient to have to count from the beginning of the text
  55. * just to get the current/limit/length index after moving relative to it.
  56. * The actual index can be determined with getIndex(UITER_CURRENT)
  57. * which will count the UChars if necessary.
  58. *
  59. * @stable ICU 2.6
  60. */
  61. UITER_UNKNOWN_INDEX=-2
  62. };
  63. /**
  64. * Constant for UCharIterator getState() indicating an error or
  65. * an unknown state.
  66. * Returned by uiter_getState()/UCharIteratorGetState
  67. * when an error occurs.
  68. * Also, some UCharIterator implementations may not be able to return
  69. * a valid state for each position. This will be clearly documented
  70. * for each such iterator (none of the public ones here).
  71. *
  72. * @stable ICU 2.6
  73. */
  74. #define UITER_NO_STATE ((uint32_t)0xffffffff)
  75. /**
  76. * Function type declaration for UCharIterator.getIndex().
  77. *
  78. * Gets the current position, or the start or limit of the
  79. * iteration range.
  80. *
  81. * This function may perform slowly for UITER_CURRENT after setState() was called,
  82. * or for UITER_LENGTH, because an iterator implementation may have to count
  83. * UChars if the underlying storage is not UTF-16.
  84. *
  85. * @param iter the UCharIterator structure ("this pointer")
  86. * @param origin get the 0, start, limit, length, or current index
  87. * @return the requested index, or U_SENTINEL in an error condition
  88. *
  89. * @see UCharIteratorOrigin
  90. * @see UCharIterator
  91. * @stable ICU 2.1
  92. */
  93. typedef int32_t U_CALLCONV
  94. UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
  95. /**
  96. * Function type declaration for UCharIterator.move().
  97. *
  98. * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
  99. *
  100. * Moves the current position relative to the start or limit of the
  101. * iteration range, or relative to the current position itself.
  102. * The movement is expressed in numbers of code units forward
  103. * or backward by specifying a positive or negative delta.
  104. * Out of bounds movement will be pinned to the start or limit.
  105. *
  106. * This function may perform slowly for moving relative to UITER_LENGTH
  107. * because an iterator implementation may have to count the rest of the
  108. * UChars if the native storage is not UTF-16.
  109. *
  110. * When moving relative to the limit or length, or
  111. * relative to the current position after setState() was called,
  112. * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
  113. * determination of the actual UTF-16 index.
  114. * The actual index can be determined with getIndex(UITER_CURRENT)
  115. * which will count the UChars if necessary.
  116. * See UITER_UNKNOWN_INDEX for details.
  117. *
  118. * @param iter the UCharIterator structure ("this pointer")
  119. * @param delta can be positive, zero, or negative
  120. * @param origin move relative to the 0, start, limit, length, or current index
  121. * @return the new index, or U_SENTINEL on an error condition,
  122. * or UITER_UNKNOWN_INDEX when the index is not known.
  123. *
  124. * @see UCharIteratorOrigin
  125. * @see UCharIterator
  126. * @see UITER_UNKNOWN_INDEX
  127. * @stable ICU 2.1
  128. */
  129. typedef int32_t U_CALLCONV
  130. UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
  131. /**
  132. * Function type declaration for UCharIterator.hasNext().
  133. *
  134. * Check if current() and next() can still
  135. * return another code unit.
  136. *
  137. * @param iter the UCharIterator structure ("this pointer")
  138. * @return boolean value for whether current() and next() can still return another code unit
  139. *
  140. * @see UCharIterator
  141. * @stable ICU 2.1
  142. */
  143. typedef UBool U_CALLCONV
  144. UCharIteratorHasNext(UCharIterator *iter);
  145. /**
  146. * Function type declaration for UCharIterator.hasPrevious().
  147. *
  148. * Check if previous() can still return another code unit.
  149. *
  150. * @param iter the UCharIterator structure ("this pointer")
  151. * @return boolean value for whether previous() can still return another code unit
  152. *
  153. * @see UCharIterator
  154. * @stable ICU 2.1
  155. */
  156. typedef UBool U_CALLCONV
  157. UCharIteratorHasPrevious(UCharIterator *iter);
  158. /**
  159. * Function type declaration for UCharIterator.current().
  160. *
  161. * Return the code unit at the current position,
  162. * or U_SENTINEL if there is none (index is at the limit).
  163. *
  164. * @param iter the UCharIterator structure ("this pointer")
  165. * @return the current code unit
  166. *
  167. * @see UCharIterator
  168. * @stable ICU 2.1
  169. */
  170. typedef UChar32 U_CALLCONV
  171. UCharIteratorCurrent(UCharIterator *iter);
  172. /**
  173. * Function type declaration for UCharIterator.next().
  174. *
  175. * Return the code unit at the current index and increment
  176. * the index (post-increment, like s[i++]),
  177. * or return U_SENTINEL if there is none (index is at the limit).
  178. *
  179. * @param iter the UCharIterator structure ("this pointer")
  180. * @return the current code unit (and post-increment the current index)
  181. *
  182. * @see UCharIterator
  183. * @stable ICU 2.1
  184. */
  185. typedef UChar32 U_CALLCONV
  186. UCharIteratorNext(UCharIterator *iter);
  187. /**
  188. * Function type declaration for UCharIterator.previous().
  189. *
  190. * Decrement the index and return the code unit from there
  191. * (pre-decrement, like s[--i]),
  192. * or return U_SENTINEL if there is none (index is at the start).
  193. *
  194. * @param iter the UCharIterator structure ("this pointer")
  195. * @return the previous code unit (after pre-decrementing the current index)
  196. *
  197. * @see UCharIterator
  198. * @stable ICU 2.1
  199. */
  200. typedef UChar32 U_CALLCONV
  201. UCharIteratorPrevious(UCharIterator *iter);
  202. /**
  203. * Function type declaration for UCharIterator.reservedFn().
  204. * Reserved for future use.
  205. *
  206. * @param iter the UCharIterator structure ("this pointer")
  207. * @param something some integer argument
  208. * @return some integer
  209. *
  210. * @see UCharIterator
  211. * @stable ICU 2.1
  212. */
  213. typedef int32_t U_CALLCONV
  214. UCharIteratorReserved(UCharIterator *iter, int32_t something);
  215. /**
  216. * Function type declaration for UCharIterator.getState().
  217. *
  218. * Get the "state" of the iterator in the form of a single 32-bit word.
  219. * It is recommended that the state value be calculated to be as small as
  220. * is feasible. For strings with limited lengths, fewer than 32 bits may
  221. * be sufficient.
  222. *
  223. * This is used together with setState()/UCharIteratorSetState
  224. * to save and restore the iterator position more efficiently than with
  225. * getIndex()/move().
  226. *
  227. * The iterator state is defined as a uint32_t value because it is designed
  228. * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
  229. * of the character iterator.
  230. *
  231. * With some UCharIterator implementations (e.g., UTF-8),
  232. * getting and setting the UTF-16 index with existing functions
  233. * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
  234. * relatively slow because the iterator has to "walk" from a known index
  235. * to the requested one.
  236. * This takes more time the farther it needs to go.
  237. *
  238. * An opaque state value allows an iterator implementation to provide
  239. * an internal index (UTF-8: the source byte array index) for
  240. * fast, constant-time restoration.
  241. *
  242. * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
  243. * the UTF-16 index may not be restored as well, but the iterator can deliver
  244. * the correct text contents and move relative to the current position
  245. * without performance degradation.
  246. *
  247. * Some UCharIterator implementations may not be able to return
  248. * a valid state for each position, in which case they return UITER_NO_STATE instead.
  249. * This will be clearly documented for each such iterator (none of the public ones here).
  250. *
  251. * @param iter the UCharIterator structure ("this pointer")
  252. * @return the state word
  253. *
  254. * @see UCharIterator
  255. * @see UCharIteratorSetState
  256. * @see UITER_NO_STATE
  257. * @stable ICU 2.6
  258. */
  259. typedef uint32_t U_CALLCONV
  260. UCharIteratorGetState(const UCharIterator *iter);
  261. /**
  262. * Function type declaration for UCharIterator.setState().
  263. *
  264. * Restore the "state" of the iterator using a state word from a getState() call.
  265. * The iterator object need not be the same one as for which getState() was called,
  266. * but it must be of the same type (set up using the same uiter_setXYZ function)
  267. * and it must iterate over the same string
  268. * (binary identical regardless of memory address).
  269. * For more about the state word see UCharIteratorGetState.
  270. *
  271. * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
  272. * the UTF-16 index may not be restored as well, but the iterator can deliver
  273. * the correct text contents and move relative to the current position
  274. * without performance degradation.
  275. *
  276. * @param iter the UCharIterator structure ("this pointer")
  277. * @param state the state word from a getState() call
  278. * on a same-type, same-string iterator
  279. * @param pErrorCode Must be a valid pointer to an error code value,
  280. * which must not indicate a failure before the function call.
  281. *
  282. * @see UCharIterator
  283. * @see UCharIteratorGetState
  284. * @stable ICU 2.6
  285. */
  286. typedef void U_CALLCONV
  287. UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
  288. /**
  289. * C API for code unit iteration.
  290. * This can be used as a C wrapper around
  291. * CharacterIterator, Replaceable, or implemented using simple strings, etc.
  292. *
  293. * There are two roles for using UCharIterator:
  294. *
  295. * A "provider" sets the necessary function pointers and controls the "protected"
  296. * fields of the UCharIterator structure. A "provider" passes a UCharIterator
  297. * into C APIs that need a UCharIterator as an abstract, flexible string interface.
  298. *
  299. * Implementations of such C APIs are "callers" of UCharIterator functions;
  300. * they only use the "public" function pointers and never access the "protected"
  301. * fields directly.
  302. *
  303. * The current() and next() functions only check the current index against the
  304. * limit, and previous() only checks the current index against the start,
  305. * to see if the iterator already reached the end of the iteration range.
  306. *
  307. * The assumption - in all iterators - is that the index is moved via the API,
  308. * which means it won't go out of bounds, or the index is modified by
  309. * user code that knows enough about the iterator implementation to set valid
  310. * index values.
  311. *
  312. * UCharIterator functions return code unit values 0..0xffff,
  313. * or U_SENTINEL if the iteration bounds are reached.
  314. *
  315. * @stable ICU 2.1
  316. */
  317. struct UCharIterator {
  318. /**
  319. * (protected) Pointer to string or wrapped object or similar.
  320. * Not used by caller.
  321. * @stable ICU 2.1
  322. */
  323. const void *context;
  324. /**
  325. * (protected) Length of string or similar.
  326. * Not used by caller.
  327. * @stable ICU 2.1
  328. */
  329. int32_t length;
  330. /**
  331. * (protected) Start index or similar.
  332. * Not used by caller.
  333. * @stable ICU 2.1
  334. */
  335. int32_t start;
  336. /**
  337. * (protected) Current index or similar.
  338. * Not used by caller.
  339. * @stable ICU 2.1
  340. */
  341. int32_t index;
  342. /**
  343. * (protected) Limit index or similar.
  344. * Not used by caller.
  345. * @stable ICU 2.1
  346. */
  347. int32_t limit;
  348. /**
  349. * (protected) Used by UTF-8 iterators and possibly others.
  350. * @stable ICU 2.1
  351. */
  352. int32_t reservedField;
  353. /**
  354. * (public) Returns the current position or the
  355. * start or limit index of the iteration range.
  356. *
  357. * @see UCharIteratorGetIndex
  358. * @stable ICU 2.1
  359. */
  360. UCharIteratorGetIndex *getIndex;
  361. /**
  362. * (public) Moves the current position relative to the start or limit of the
  363. * iteration range, or relative to the current position itself.
  364. * The movement is expressed in numbers of code units forward
  365. * or backward by specifying a positive or negative delta.
  366. *
  367. * @see UCharIteratorMove
  368. * @stable ICU 2.1
  369. */
  370. UCharIteratorMove *move;
  371. /**
  372. * (public) Check if current() and next() can still
  373. * return another code unit.
  374. *
  375. * @see UCharIteratorHasNext
  376. * @stable ICU 2.1
  377. */
  378. UCharIteratorHasNext *hasNext;
  379. /**
  380. * (public) Check if previous() can still return another code unit.
  381. *
  382. * @see UCharIteratorHasPrevious
  383. * @stable ICU 2.1
  384. */
  385. UCharIteratorHasPrevious *hasPrevious;
  386. /**
  387. * (public) Return the code unit at the current position,
  388. * or U_SENTINEL if there is none (index is at the limit).
  389. *
  390. * @see UCharIteratorCurrent
  391. * @stable ICU 2.1
  392. */
  393. UCharIteratorCurrent *current;
  394. /**
  395. * (public) Return the code unit at the current index and increment
  396. * the index (post-increment, like s[i++]),
  397. * or return U_SENTINEL if there is none (index is at the limit).
  398. *
  399. * @see UCharIteratorNext
  400. * @stable ICU 2.1
  401. */
  402. UCharIteratorNext *next;
  403. /**
  404. * (public) Decrement the index and return the code unit from there
  405. * (pre-decrement, like s[--i]),
  406. * or return U_SENTINEL if there is none (index is at the start).
  407. *
  408. * @see UCharIteratorPrevious
  409. * @stable ICU 2.1
  410. */
  411. UCharIteratorPrevious *previous;
  412. /**
  413. * (public) Reserved for future use. Currently NULL.
  414. *
  415. * @see UCharIteratorReserved
  416. * @stable ICU 2.1
  417. */
  418. UCharIteratorReserved *reservedFn;
  419. /**
  420. * (public) Return the state of the iterator, to be restored later with setState().
  421. * This function pointer is NULL if the iterator does not implement it.
  422. *
  423. * @see UCharIteratorGet
  424. * @stable ICU 2.6
  425. */
  426. UCharIteratorGetState *getState;
  427. /**
  428. * (public) Restore the iterator state from the state word from a call
  429. * to getState().
  430. * This function pointer is NULL if the iterator does not implement it.
  431. *
  432. * @see UCharIteratorSet
  433. * @stable ICU 2.6
  434. */
  435. UCharIteratorSetState *setState;
  436. };
  437. /**
  438. * Helper function for UCharIterator to get the code point
  439. * at the current index.
  440. *
  441. * Return the code point that includes the code unit at the current position,
  442. * or U_SENTINEL if there is none (index is at the limit).
  443. * If the current code unit is a lead or trail surrogate,
  444. * then the following or preceding surrogate is used to form
  445. * the code point value.
  446. *
  447. * @param iter the UCharIterator structure ("this pointer")
  448. * @return the current code point
  449. *
  450. * @see UCharIterator
  451. * @see U16_GET
  452. * @see UnicodeString::char32At()
  453. * @stable ICU 2.1
  454. */
  455. U_CAPI UChar32 U_EXPORT2
  456. uiter_current32(UCharIterator *iter);
  457. /**
  458. * Helper function for UCharIterator to get the next code point.
  459. *
  460. * Return the code point at the current index and increment
  461. * the index (post-increment, like s[i++]),
  462. * or return U_SENTINEL if there is none (index is at the limit).
  463. *
  464. * @param iter the UCharIterator structure ("this pointer")
  465. * @return the current code point (and post-increment the current index)
  466. *
  467. * @see UCharIterator
  468. * @see U16_NEXT
  469. * @stable ICU 2.1
  470. */
  471. U_CAPI UChar32 U_EXPORT2
  472. uiter_next32(UCharIterator *iter);
  473. /**
  474. * Helper function for UCharIterator to get the previous code point.
  475. *
  476. * Decrement the index and return the code point from there
  477. * (pre-decrement, like s[--i]),
  478. * or return U_SENTINEL if there is none (index is at the start).
  479. *
  480. * @param iter the UCharIterator structure ("this pointer")
  481. * @return the previous code point (after pre-decrementing the current index)
  482. *
  483. * @see UCharIterator
  484. * @see U16_PREV
  485. * @stable ICU 2.1
  486. */
  487. U_CAPI UChar32 U_EXPORT2
  488. uiter_previous32(UCharIterator *iter);
  489. /**
  490. * Get the "state" of the iterator in the form of a single 32-bit word.
  491. * This is a convenience function that calls iter->getState(iter)
  492. * if iter->getState is not NULL;
  493. * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
  494. *
  495. * Some UCharIterator implementations may not be able to return
  496. * a valid state for each position, in which case they return UITER_NO_STATE instead.
  497. * This will be clearly documented for each such iterator (none of the public ones here).
  498. *
  499. * @param iter the UCharIterator structure ("this pointer")
  500. * @return the state word
  501. *
  502. * @see UCharIterator
  503. * @see UCharIteratorGetState
  504. * @see UITER_NO_STATE
  505. * @stable ICU 2.6
  506. */
  507. U_CAPI uint32_t U_EXPORT2
  508. uiter_getState(const UCharIterator *iter);
  509. /**
  510. * Restore the "state" of the iterator using a state word from a getState() call.
  511. * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
  512. * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
  513. *
  514. * @param iter the UCharIterator structure ("this pointer")
  515. * @param state the state word from a getState() call
  516. * on a same-type, same-string iterator
  517. * @param pErrorCode Must be a valid pointer to an error code value,
  518. * which must not indicate a failure before the function call.
  519. *
  520. * @see UCharIterator
  521. * @see UCharIteratorSetState
  522. * @stable ICU 2.6
  523. */
  524. U_CAPI void U_EXPORT2
  525. uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
  526. /**
  527. * Set up a UCharIterator to iterate over a string.
  528. *
  529. * Sets the UCharIterator function pointers for iteration over the string s
  530. * with iteration boundaries start=index=0 and length=limit=string length.
  531. * The "provider" may set the start, index, and limit values at any time
  532. * within the range 0..length.
  533. * The length field will be ignored.
  534. *
  535. * The string pointer s is set into UCharIterator.context without copying
  536. * or reallocating the string contents.
  537. *
  538. * getState() simply returns the current index.
  539. * move() will always return the final index.
  540. *
  541. * @param iter UCharIterator structure to be set for iteration
  542. * @param s String to iterate over
  543. * @param length Length of s, or -1 if NUL-terminated
  544. *
  545. * @see UCharIterator
  546. * @stable ICU 2.1
  547. */
  548. U_CAPI void U_EXPORT2
  549. uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
  550. /**
  551. * Set up a UCharIterator to iterate over a UTF-16BE string
  552. * (byte vector with a big-endian pair of bytes per UChar).
  553. *
  554. * Everything works just like with a normal UChar iterator (uiter_setString),
  555. * except that UChars are assembled from byte pairs,
  556. * and that the length argument here indicates an even number of bytes.
  557. *
  558. * getState() simply returns the current index.
  559. * move() will always return the final index.
  560. *
  561. * @param iter UCharIterator structure to be set for iteration
  562. * @param s UTF-16BE string to iterate over
  563. * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
  564. * (NUL means pair of 0 bytes at even index from s)
  565. *
  566. * @see UCharIterator
  567. * @see uiter_setString
  568. * @stable ICU 2.6
  569. */
  570. U_CAPI void U_EXPORT2
  571. uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
  572. /**
  573. * Set up a UCharIterator to iterate over a UTF-8 string.
  574. *
  575. * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
  576. * with UTF-8 iteration boundaries 0 and length.
  577. * The implementation counts the UTF-16 index on the fly and
  578. * lazily evaluates the UTF-16 length of the text.
  579. *
  580. * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
  581. * When the reservedField is not 0, then it contains a supplementary code point
  582. * and the UTF-16 index is between the two corresponding surrogates.
  583. * At that point, the UTF-8 index is behind that code point.
  584. *
  585. * The UTF-8 string pointer s is set into UCharIterator.context without copying
  586. * or reallocating the string contents.
  587. *
  588. * getState() returns a state value consisting of
  589. * - the current UTF-8 source byte index (bits 31..1)
  590. * - a flag (bit 0) that indicates whether the UChar position is in the middle
  591. * of a surrogate pair
  592. * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
  593. *
  594. * getState() cannot also encode the UTF-16 index in the state value.
  595. * move(relative to limit or length), or
  596. * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
  597. *
  598. * @param iter UCharIterator structure to be set for iteration
  599. * @param s UTF-8 string to iterate over
  600. * @param length Length of s in bytes, or -1 if NUL-terminated
  601. *
  602. * @see UCharIterator
  603. * @stable ICU 2.6
  604. */
  605. U_CAPI void U_EXPORT2
  606. uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
  607. #if U_SHOW_CPLUSPLUS_API
  608. /**
  609. * Set up a UCharIterator to wrap around a C++ CharacterIterator.
  610. *
  611. * Sets the UCharIterator function pointers for iteration using the
  612. * CharacterIterator charIter.
  613. *
  614. * The CharacterIterator pointer charIter is set into UCharIterator.context
  615. * without copying or cloning the CharacterIterator object.
  616. * The other "protected" UCharIterator fields are set to 0 and will be ignored.
  617. * The iteration index and boundaries are controlled by the CharacterIterator.
  618. *
  619. * getState() simply returns the current index.
  620. * move() will always return the final index.
  621. *
  622. * @param iter UCharIterator structure to be set for iteration
  623. * @param charIter CharacterIterator to wrap
  624. *
  625. * @see UCharIterator
  626. * @stable ICU 2.1
  627. */
  628. U_CAPI void U_EXPORT2
  629. uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
  630. /**
  631. * Set up a UCharIterator to iterate over a C++ Replaceable.
  632. *
  633. * Sets the UCharIterator function pointers for iteration over the
  634. * Replaceable rep with iteration boundaries start=index=0 and
  635. * length=limit=rep->length().
  636. * The "provider" may set the start, index, and limit values at any time
  637. * within the range 0..length=rep->length().
  638. * The length field will be ignored.
  639. *
  640. * The Replaceable pointer rep is set into UCharIterator.context without copying
  641. * or cloning/reallocating the Replaceable object.
  642. *
  643. * getState() simply returns the current index.
  644. * move() will always return the final index.
  645. *
  646. * @param iter UCharIterator structure to be set for iteration
  647. * @param rep Replaceable to iterate over
  648. *
  649. * @see UCharIterator
  650. * @stable ICU 2.1
  651. */
  652. U_CAPI void U_EXPORT2
  653. uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
  654. #endif
  655. U_CDECL_END
  656. #endif