usetiter.h 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (c) 2002-2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. */
  9. #ifndef USETITER_H
  10. #define USETITER_H
  11. #include "unicode/utypes.h"
  12. #if U_SHOW_CPLUSPLUS_API
  13. #include "unicode/uobject.h"
  14. #include "unicode/unistr.h"
  15. /**
  16. * \file
  17. * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
  18. */
  19. U_NAMESPACE_BEGIN
  20. class UnicodeSet;
  21. class UnicodeString;
  22. /**
  23. *
  24. * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
  25. * iterates over either code points or code point ranges. After all
  26. * code points or ranges have been returned, it returns the
  27. * multicharacter strings of the UnicodeSet, if any.
  28. *
  29. * This class is not intended for public subclassing.
  30. *
  31. * <p>To iterate over code points and strings, use a loop like this:
  32. * <pre>
  33. * UnicodeSetIterator it(set);
  34. * while (it.next()) {
  35. * processItem(it.getString());
  36. * }
  37. * </pre>
  38. * <p>Each item in the set is accessed as a string. Set elements
  39. * consisting of single code points are returned as strings containing
  40. * just the one code point.
  41. *
  42. * <p>To iterate over code point ranges, instead of individual code points,
  43. * use a loop like this:
  44. * <pre>
  45. * UnicodeSetIterator it(set);
  46. * while (it.nextRange()) {
  47. * if (it.isString()) {
  48. * processString(it.getString());
  49. * } else {
  50. * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
  51. * }
  52. * }
  53. * </pre>
  54. *
  55. * To iterate over only the strings, start with <code>skipToStrings()</code>.
  56. *
  57. * @author M. Davis
  58. * @stable ICU 2.4
  59. */
  60. class U_COMMON_API UnicodeSetIterator final : public UObject {
  61. /**
  62. * Value of <tt>codepoint</tt> if the iterator points to a string.
  63. * If <tt>codepoint == IS_STRING</tt>, then examine
  64. * <tt>string</tt> for the current iteration result.
  65. */
  66. enum { IS_STRING = -1 };
  67. /**
  68. * Current code point, or the special value <tt>IS_STRING</tt>, if
  69. * the iterator points to a string.
  70. */
  71. UChar32 codepoint;
  72. /**
  73. * When iterating over ranges using <tt>nextRange()</tt>,
  74. * <tt>codepointEnd</tt> contains the inclusive end of the
  75. * iteration range, if <tt>codepoint != IS_STRING</tt>. If
  76. * iterating over code points using <tt>next()</tt>, or if
  77. * <tt>codepoint == IS_STRING</tt>, then the value of
  78. * <tt>codepointEnd</tt> is undefined.
  79. */
  80. UChar32 codepointEnd;
  81. /**
  82. * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
  83. * to the current string. If <tt>codepoint != IS_STRING</tt>, the
  84. * value of <tt>string</tt> is undefined.
  85. */
  86. const UnicodeString* string;
  87. public:
  88. /**
  89. * Create an iterator over the given set. The iterator is valid
  90. * only so long as <tt>set</tt> is valid.
  91. * @param set set to iterate over
  92. * @stable ICU 2.4
  93. */
  94. UnicodeSetIterator(const UnicodeSet& set);
  95. /**
  96. * Create an iterator over nothing. <tt>next()</tt> and
  97. * <tt>nextRange()</tt> return false. This is a convenience
  98. * constructor allowing the target to be set later.
  99. * @stable ICU 2.4
  100. */
  101. UnicodeSetIterator();
  102. /**
  103. * Destructor.
  104. * @stable ICU 2.4
  105. */
  106. virtual ~UnicodeSetIterator();
  107. /**
  108. * Returns true if the current element is a string. If so, the
  109. * caller can retrieve it with <tt>getString()</tt>. If this
  110. * method returns false, the current element is a code point or
  111. * code point range, depending on whether <tt>next()</tt> or
  112. * <tt>nextRange()</tt> was called.
  113. * Elements of types string and codepoint can both be retrieved
  114. * with the function <tt>getString()</tt>.
  115. * Elements of type codepoint can also be retrieved with
  116. * <tt>getCodepoint()</tt>.
  117. * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
  118. * of the range, and <tt>getCodepointEnd()</tt> returns the end
  119. * of the range.
  120. * @stable ICU 2.4
  121. */
  122. inline UBool isString() const;
  123. /**
  124. * Returns the current code point, if <tt>isString()</tt> returned
  125. * false. Otherwise returns an undefined result.
  126. * @stable ICU 2.4
  127. */
  128. inline UChar32 getCodepoint() const;
  129. /**
  130. * Returns the end of the current code point range, if
  131. * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
  132. * called. Otherwise returns an undefined result.
  133. * @stable ICU 2.4
  134. */
  135. inline UChar32 getCodepointEnd() const;
  136. /**
  137. * Returns the current string, if <tt>isString()</tt> returned
  138. * true. If the current iteration item is a code point, a UnicodeString
  139. * containing that single code point is returned.
  140. *
  141. * Ownership of the returned string remains with the iterator.
  142. * The string is guaranteed to remain valid only until the iterator is
  143. * advanced to the next item, or until the iterator is deleted.
  144. *
  145. * @stable ICU 2.4
  146. */
  147. const UnicodeString& getString();
  148. /**
  149. * Skips over the remaining code points/ranges, if any.
  150. * A following call to next() or nextRange() will yield a string, if there is one.
  151. * No-op if next() would return false, or if it would yield a string anyway.
  152. *
  153. * @return *this
  154. * @stable ICU 70
  155. * @see UnicodeSet#strings()
  156. */
  157. inline UnicodeSetIterator &skipToStrings() {
  158. // Finish code point/range iteration.
  159. range = endRange;
  160. endElement = -1;
  161. nextElement = 0;
  162. return *this;
  163. }
  164. /**
  165. * Advances the iteration position to the next element in the set,
  166. * which can be either a single code point or a string.
  167. * If there are no more elements in the set, return false.
  168. *
  169. * <p>
  170. * If <tt>isString() == true</tt>, the value is a
  171. * string, otherwise the value is a
  172. * single code point. Elements of either type can be retrieved
  173. * with the function <tt>getString()</tt>, while elements of
  174. * consisting of a single code point can be retrieved with
  175. * <tt>getCodepoint()</tt>
  176. *
  177. * <p>The order of iteration is all code points in sorted order,
  178. * followed by all strings sorted order. Do not mix
  179. * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
  180. * calling <tt>reset()</tt> between them. The results of doing so
  181. * are undefined.
  182. *
  183. * @return true if there was another element in the set.
  184. * @stable ICU 2.4
  185. */
  186. UBool next();
  187. /**
  188. * Returns the next element in the set, either a code point range
  189. * or a string. If there are no more elements in the set, return
  190. * false. If <tt>isString() == true</tt>, the value is a
  191. * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a
  192. * range of one or more code points from <tt>getCodepoint()</tt> to
  193. * <tt>getCodepointeEnd()</tt> inclusive.
  194. *
  195. * <p>The order of iteration is all code points ranges in sorted
  196. * order, followed by all strings sorted order. Ranges are
  197. * disjoint and non-contiguous. The value returned from <tt>getString()</tt>
  198. * is undefined unless <tt>isString() == true</tt>. Do not mix calls to
  199. * <tt>next()</tt> and <tt>nextRange()</tt> without calling
  200. * <tt>reset()</tt> between them. The results of doing so are
  201. * undefined.
  202. *
  203. * @return true if there was another element in the set.
  204. * @stable ICU 2.4
  205. */
  206. UBool nextRange();
  207. /**
  208. * Sets this iterator to visit the elements of the given set and
  209. * resets it to the start of that set. The iterator is valid only
  210. * so long as <tt>set</tt> is valid.
  211. * @param set the set to iterate over.
  212. * @stable ICU 2.4
  213. */
  214. void reset(const UnicodeSet& set);
  215. /**
  216. * Resets this iterator to the start of the set.
  217. * @stable ICU 2.4
  218. */
  219. void reset();
  220. /**
  221. * ICU "poor man's RTTI", returns a UClassID for this class.
  222. *
  223. * @stable ICU 2.4
  224. */
  225. static UClassID U_EXPORT2 getStaticClassID();
  226. /**
  227. * ICU "poor man's RTTI", returns a UClassID for the actual class.
  228. *
  229. * @stable ICU 2.4
  230. */
  231. virtual UClassID getDynamicClassID() const override;
  232. // ======================= PRIVATES ===========================
  233. private:
  234. // endElement and nextElements are really UChar32's, but we keep
  235. // them as signed int32_t's so we can do comparisons with
  236. // endElement set to -1. Leave them as int32_t's.
  237. /** The set
  238. */
  239. const UnicodeSet* set;
  240. /** End range
  241. */
  242. int32_t endRange;
  243. /** Range
  244. */
  245. int32_t range;
  246. /** End element
  247. */
  248. int32_t endElement;
  249. /** Next element
  250. */
  251. int32_t nextElement;
  252. /** Next string
  253. */
  254. int32_t nextString;
  255. /** String count
  256. */
  257. int32_t stringCount;
  258. /**
  259. * Points to the string to use when the caller asks for a
  260. * string and the current iteration item is a code point, not a string.
  261. */
  262. UnicodeString *cpString;
  263. /** Copy constructor. Disallowed.
  264. */
  265. UnicodeSetIterator(const UnicodeSetIterator&) = delete;
  266. /** Assignment operator. Disallowed.
  267. */
  268. UnicodeSetIterator& operator=(const UnicodeSetIterator&) = delete;
  269. /** Load range
  270. */
  271. void loadRange(int32_t range);
  272. };
  273. inline UBool UnicodeSetIterator::isString() const {
  274. return codepoint < 0;
  275. }
  276. inline UChar32 UnicodeSetIterator::getCodepoint() const {
  277. return codepoint;
  278. }
  279. inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
  280. return codepointEnd;
  281. }
  282. U_NAMESPACE_END
  283. #endif /* U_SHOW_CPLUSPLUS_API */
  284. #endif