search.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
  6. **********************************************************************
  7. * Date Name Description
  8. * 03/22/2000 helena Creation.
  9. **********************************************************************
  10. */
  11. #include "unicode/utypes.h"
  12. #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
  13. #include "unicode/brkiter.h"
  14. #include "unicode/schriter.h"
  15. #include "unicode/search.h"
  16. #include "usrchimp.h"
  17. #include "cmemory.h"
  18. // public constructors and destructors -----------------------------------
  19. U_NAMESPACE_BEGIN
  20. SearchIterator::SearchIterator(const SearchIterator &other)
  21. : UObject(other)
  22. {
  23. m_breakiterator_ = other.m_breakiterator_;
  24. m_text_ = other.m_text_;
  25. m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
  26. m_search_->breakIter = other.m_search_->breakIter;
  27. m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
  28. m_search_->isOverlap = other.m_search_->isOverlap;
  29. m_search_->elementComparisonType = other.m_search_->elementComparisonType;
  30. m_search_->matchedIndex = other.m_search_->matchedIndex;
  31. m_search_->matchedLength = other.m_search_->matchedLength;
  32. m_search_->text = other.m_search_->text;
  33. m_search_->textLength = other.m_search_->textLength;
  34. }
  35. SearchIterator::~SearchIterator()
  36. {
  37. if (m_search_ != nullptr) {
  38. uprv_free(m_search_);
  39. }
  40. }
  41. // public get and set methods ----------------------------------------
  42. void SearchIterator::setAttribute(USearchAttribute attribute,
  43. USearchAttributeValue value,
  44. UErrorCode &status)
  45. {
  46. if (U_SUCCESS(status)) {
  47. switch (attribute)
  48. {
  49. case USEARCH_OVERLAP :
  50. m_search_->isOverlap = (value == USEARCH_ON ? true : false);
  51. break;
  52. case USEARCH_CANONICAL_MATCH :
  53. m_search_->isCanonicalMatch = (value == USEARCH_ON ? true : false);
  54. break;
  55. case USEARCH_ELEMENT_COMPARISON :
  56. if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
  57. m_search_->elementComparisonType = (int16_t)value;
  58. } else {
  59. m_search_->elementComparisonType = 0;
  60. }
  61. break;
  62. default:
  63. status = U_ILLEGAL_ARGUMENT_ERROR;
  64. }
  65. }
  66. if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
  67. status = U_ILLEGAL_ARGUMENT_ERROR;
  68. }
  69. }
  70. USearchAttributeValue SearchIterator::getAttribute(
  71. USearchAttribute attribute) const
  72. {
  73. switch (attribute) {
  74. case USEARCH_OVERLAP :
  75. return (m_search_->isOverlap ? USEARCH_ON : USEARCH_OFF);
  76. case USEARCH_CANONICAL_MATCH :
  77. return (m_search_->isCanonicalMatch ? USEARCH_ON : USEARCH_OFF);
  78. case USEARCH_ELEMENT_COMPARISON :
  79. {
  80. int16_t value = m_search_->elementComparisonType;
  81. if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
  82. return (USearchAttributeValue)value;
  83. } else {
  84. return USEARCH_STANDARD_ELEMENT_COMPARISON;
  85. }
  86. }
  87. default :
  88. return USEARCH_DEFAULT;
  89. }
  90. }
  91. int32_t SearchIterator::getMatchedStart() const
  92. {
  93. return m_search_->matchedIndex;
  94. }
  95. int32_t SearchIterator::getMatchedLength() const
  96. {
  97. return m_search_->matchedLength;
  98. }
  99. void SearchIterator::getMatchedText(UnicodeString &result) const
  100. {
  101. int32_t matchedindex = m_search_->matchedIndex;
  102. int32_t matchedlength = m_search_->matchedLength;
  103. if (matchedindex != USEARCH_DONE && matchedlength != 0) {
  104. result.setTo(m_search_->text + matchedindex, matchedlength);
  105. }
  106. else {
  107. result.remove();
  108. }
  109. }
  110. void SearchIterator::setBreakIterator(BreakIterator *breakiter,
  111. UErrorCode &status)
  112. {
  113. if (U_SUCCESS(status)) {
  114. #if 0
  115. m_search_->breakIter = nullptr;
  116. // the c++ breakiterator may not make use of ubreakiterator.
  117. // so we'll have to keep track of it ourselves.
  118. #else
  119. // Well, gee... the Constructors that take a BreakIterator
  120. // all cast the BreakIterator to a UBreakIterator and
  121. // pass it to the corresponding usearch_openFromXXX
  122. // routine, so there's no reason not to do this.
  123. //
  124. // Besides, a UBreakIterator is a BreakIterator, so
  125. // any subclass of BreakIterator should work fine here...
  126. m_search_->breakIter = (UBreakIterator *) breakiter;
  127. #endif
  128. m_breakiterator_ = breakiter;
  129. }
  130. }
  131. const BreakIterator * SearchIterator::getBreakIterator() const
  132. {
  133. return m_breakiterator_;
  134. }
  135. void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
  136. {
  137. if (U_SUCCESS(status)) {
  138. if (text.length() == 0) {
  139. status = U_ILLEGAL_ARGUMENT_ERROR;
  140. }
  141. else {
  142. m_text_ = text;
  143. m_search_->text = m_text_.getBuffer();
  144. m_search_->textLength = m_text_.length();
  145. }
  146. }
  147. }
  148. void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
  149. {
  150. if (U_SUCCESS(status)) {
  151. text.getText(m_text_);
  152. setText(m_text_, status);
  153. }
  154. }
  155. const UnicodeString & SearchIterator::getText() const
  156. {
  157. return m_text_;
  158. }
  159. // operator overloading ----------------------------------------------
  160. bool SearchIterator::operator==(const SearchIterator &that) const
  161. {
  162. if (this == &that) {
  163. return true;
  164. }
  165. return (m_breakiterator_ == that.m_breakiterator_ &&
  166. m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
  167. m_search_->isOverlap == that.m_search_->isOverlap &&
  168. m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
  169. m_search_->matchedIndex == that.m_search_->matchedIndex &&
  170. m_search_->matchedLength == that.m_search_->matchedLength &&
  171. m_search_->textLength == that.m_search_->textLength &&
  172. getOffset() == that.getOffset() &&
  173. (m_search_->textLength == 0 ||
  174. (uprv_memcmp(m_search_->text, that.m_search_->text,
  175. m_search_->textLength * sizeof(char16_t)) == 0)));
  176. }
  177. // public methods ----------------------------------------------------
  178. int32_t SearchIterator::first(UErrorCode &status)
  179. {
  180. if (U_FAILURE(status)) {
  181. return USEARCH_DONE;
  182. }
  183. setOffset(0, status);
  184. return handleNext(0, status);
  185. }
  186. int32_t SearchIterator::following(int32_t position,
  187. UErrorCode &status)
  188. {
  189. if (U_FAILURE(status)) {
  190. return USEARCH_DONE;
  191. }
  192. setOffset(position, status);
  193. return handleNext(position, status);
  194. }
  195. int32_t SearchIterator::last(UErrorCode &status)
  196. {
  197. if (U_FAILURE(status)) {
  198. return USEARCH_DONE;
  199. }
  200. setOffset(m_search_->textLength, status);
  201. return handlePrev(m_search_->textLength, status);
  202. }
  203. int32_t SearchIterator::preceding(int32_t position,
  204. UErrorCode &status)
  205. {
  206. if (U_FAILURE(status)) {
  207. return USEARCH_DONE;
  208. }
  209. setOffset(position, status);
  210. return handlePrev(position, status);
  211. }
  212. int32_t SearchIterator::next(UErrorCode &status)
  213. {
  214. if (U_SUCCESS(status)) {
  215. int32_t offset = getOffset();
  216. int32_t matchindex = m_search_->matchedIndex;
  217. int32_t matchlength = m_search_->matchedLength;
  218. m_search_->reset = false;
  219. if (m_search_->isForwardSearching) {
  220. int32_t textlength = m_search_->textLength;
  221. if (offset == textlength || matchindex == textlength ||
  222. (matchindex != USEARCH_DONE &&
  223. matchindex + matchlength >= textlength)) {
  224. // not enough characters to match
  225. setMatchNotFound();
  226. return USEARCH_DONE;
  227. }
  228. }
  229. else {
  230. // switching direction.
  231. // if matchedIndex == USEARCH_DONE, it means that either a
  232. // setOffset has been called or that previous ran off the text
  233. // string. the iterator would have been set to offset 0 if a
  234. // match is not found.
  235. m_search_->isForwardSearching = true;
  236. if (m_search_->matchedIndex != USEARCH_DONE) {
  237. // there's no need to set the collation element iterator
  238. // the next call to next will set the offset.
  239. return matchindex;
  240. }
  241. }
  242. if (matchlength > 0) {
  243. // if matchlength is 0 we are at the start of the iteration
  244. if (m_search_->isOverlap) {
  245. offset ++;
  246. }
  247. else {
  248. offset += matchlength;
  249. }
  250. }
  251. return handleNext(offset, status);
  252. }
  253. return USEARCH_DONE;
  254. }
  255. int32_t SearchIterator::previous(UErrorCode &status)
  256. {
  257. if (U_SUCCESS(status)) {
  258. int32_t offset;
  259. if (m_search_->reset) {
  260. offset = m_search_->textLength;
  261. m_search_->isForwardSearching = false;
  262. m_search_->reset = false;
  263. setOffset(offset, status);
  264. }
  265. else {
  266. offset = getOffset();
  267. }
  268. int32_t matchindex = m_search_->matchedIndex;
  269. if (m_search_->isForwardSearching) {
  270. // switching direction.
  271. // if matchedIndex == USEARCH_DONE, it means that either a
  272. // setOffset has been called or that next ran off the text
  273. // string. the iterator would have been set to offset textLength if
  274. // a match is not found.
  275. m_search_->isForwardSearching = false;
  276. if (matchindex != USEARCH_DONE) {
  277. return matchindex;
  278. }
  279. }
  280. else {
  281. if (offset == 0 || matchindex == 0) {
  282. // not enough characters to match
  283. setMatchNotFound();
  284. return USEARCH_DONE;
  285. }
  286. }
  287. if (matchindex != USEARCH_DONE) {
  288. if (m_search_->isOverlap) {
  289. matchindex += m_search_->matchedLength - 2;
  290. }
  291. return handlePrev(matchindex, status);
  292. }
  293. return handlePrev(offset, status);
  294. }
  295. return USEARCH_DONE;
  296. }
  297. void SearchIterator::reset()
  298. {
  299. UErrorCode status = U_ZERO_ERROR;
  300. setMatchNotFound();
  301. setOffset(0, status);
  302. m_search_->isOverlap = false;
  303. m_search_->isCanonicalMatch = false;
  304. m_search_->elementComparisonType = 0;
  305. m_search_->isForwardSearching = true;
  306. m_search_->reset = true;
  307. }
  308. // protected constructors and destructors -----------------------------
  309. SearchIterator::SearchIterator()
  310. {
  311. m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
  312. m_search_->breakIter = nullptr;
  313. m_search_->isOverlap = false;
  314. m_search_->isCanonicalMatch = false;
  315. m_search_->elementComparisonType = 0;
  316. m_search_->isForwardSearching = true;
  317. m_search_->reset = true;
  318. m_search_->matchedIndex = USEARCH_DONE;
  319. m_search_->matchedLength = 0;
  320. m_search_->text = nullptr;
  321. m_search_->textLength = 0;
  322. m_breakiterator_ = nullptr;
  323. }
  324. SearchIterator::SearchIterator(const UnicodeString &text,
  325. BreakIterator *breakiter) :
  326. m_breakiterator_(breakiter),
  327. m_text_(text)
  328. {
  329. m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
  330. m_search_->breakIter = nullptr;
  331. m_search_->isOverlap = false;
  332. m_search_->isCanonicalMatch = false;
  333. m_search_->elementComparisonType = 0;
  334. m_search_->isForwardSearching = true;
  335. m_search_->reset = true;
  336. m_search_->matchedIndex = USEARCH_DONE;
  337. m_search_->matchedLength = 0;
  338. m_search_->text = m_text_.getBuffer();
  339. m_search_->textLength = text.length();
  340. }
  341. SearchIterator::SearchIterator(CharacterIterator &text,
  342. BreakIterator *breakiter) :
  343. m_breakiterator_(breakiter)
  344. {
  345. m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
  346. m_search_->breakIter = nullptr;
  347. m_search_->isOverlap = false;
  348. m_search_->isCanonicalMatch = false;
  349. m_search_->elementComparisonType = 0;
  350. m_search_->isForwardSearching = true;
  351. m_search_->reset = true;
  352. m_search_->matchedIndex = USEARCH_DONE;
  353. m_search_->matchedLength = 0;
  354. text.getText(m_text_);
  355. m_search_->text = m_text_.getBuffer();
  356. m_search_->textLength = m_text_.length();
  357. m_breakiterator_ = breakiter;
  358. }
  359. // protected methods ------------------------------------------------------
  360. SearchIterator & SearchIterator::operator=(const SearchIterator &that)
  361. {
  362. if (this != &that) {
  363. m_breakiterator_ = that.m_breakiterator_;
  364. m_text_ = that.m_text_;
  365. m_search_->breakIter = that.m_search_->breakIter;
  366. m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
  367. m_search_->isOverlap = that.m_search_->isOverlap;
  368. m_search_->elementComparisonType = that.m_search_->elementComparisonType;
  369. m_search_->matchedIndex = that.m_search_->matchedIndex;
  370. m_search_->matchedLength = that.m_search_->matchedLength;
  371. m_search_->text = that.m_search_->text;
  372. m_search_->textLength = that.m_search_->textLength;
  373. }
  374. return *this;
  375. }
  376. void SearchIterator::setMatchLength(int32_t length)
  377. {
  378. m_search_->matchedLength = length;
  379. }
  380. void SearchIterator::setMatchStart(int32_t position)
  381. {
  382. m_search_->matchedIndex = position;
  383. }
  384. void SearchIterator::setMatchNotFound()
  385. {
  386. setMatchStart(USEARCH_DONE);
  387. setMatchLength(0);
  388. UErrorCode status = U_ZERO_ERROR;
  389. // by default no errors should be returned here since offsets are within
  390. // range.
  391. if (m_search_->isForwardSearching) {
  392. setOffset(m_search_->textLength, status);
  393. }
  394. else {
  395. setOffset(0, status);
  396. }
  397. }
  398. U_NAMESPACE_END
  399. #endif /* #if !UCONFIG_NO_COLLATION */