ustr_titlecase_brkiter.cpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2011, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: ustr_titlecase_brkiter.cpp
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2011may30
  14. * created by: Markus W. Scherer
  15. *
  16. * Titlecasing functions that are based on BreakIterator
  17. * were moved here to break dependency cycles among parts of the common library.
  18. */
  19. #include "unicode/utypes.h"
  20. #if !UCONFIG_NO_BREAK_ITERATION
  21. #include "unicode/brkiter.h"
  22. #include "unicode/casemap.h"
  23. #include "unicode/chariter.h"
  24. #include "unicode/localpointer.h"
  25. #include "unicode/ubrk.h"
  26. #include "unicode/ucasemap.h"
  27. #include "unicode/utext.h"
  28. #include "cmemory.h"
  29. #include "uassert.h"
  30. #include "ucase.h"
  31. #include "ucasemap_imp.h"
  32. U_NAMESPACE_BEGIN
  33. /**
  34. * Whole-string BreakIterator.
  35. * Titlecasing only calls setText(), first(), and next().
  36. * We implement the rest only to satisfy the abstract interface.
  37. */
  38. class WholeStringBreakIterator : public BreakIterator {
  39. public:
  40. WholeStringBreakIterator() : BreakIterator(), length(0) {}
  41. ~WholeStringBreakIterator() override;
  42. bool operator==(const BreakIterator&) const override;
  43. WholeStringBreakIterator *clone() const override;
  44. static UClassID U_EXPORT2 getStaticClassID();
  45. UClassID getDynamicClassID() const override;
  46. CharacterIterator &getText() const override;
  47. UText *getUText(UText *fillIn, UErrorCode &errorCode) const override;
  48. void setText(const UnicodeString &text) override;
  49. void setText(UText *text, UErrorCode &errorCode) override;
  50. void adoptText(CharacterIterator* it) override;
  51. int32_t first() override;
  52. int32_t last() override;
  53. int32_t previous() override;
  54. int32_t next() override;
  55. int32_t current() const override;
  56. int32_t following(int32_t offset) override;
  57. int32_t preceding(int32_t offset) override;
  58. UBool isBoundary(int32_t offset) override;
  59. int32_t next(int32_t n) override;
  60. WholeStringBreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize,
  61. UErrorCode &errorCode) override;
  62. WholeStringBreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) override;
  63. private:
  64. int32_t length;
  65. };
  66. UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator)
  67. WholeStringBreakIterator::~WholeStringBreakIterator() {}
  68. bool WholeStringBreakIterator::operator==(const BreakIterator&) const { return false; }
  69. WholeStringBreakIterator *WholeStringBreakIterator::clone() const { return nullptr; }
  70. CharacterIterator &WholeStringBreakIterator::getText() const {
  71. UPRV_UNREACHABLE_EXIT; // really should not be called
  72. }
  73. UText *WholeStringBreakIterator::getUText(UText * /*fillIn*/, UErrorCode &errorCode) const {
  74. if (U_SUCCESS(errorCode)) {
  75. errorCode = U_UNSUPPORTED_ERROR;
  76. }
  77. return nullptr;
  78. }
  79. void WholeStringBreakIterator::setText(const UnicodeString &text) {
  80. length = text.length();
  81. }
  82. void WholeStringBreakIterator::setText(UText *text, UErrorCode &errorCode) {
  83. if (U_SUCCESS(errorCode)) {
  84. int64_t length64 = utext_nativeLength(text);
  85. if (length64 <= INT32_MAX) {
  86. length = (int32_t)length64;
  87. } else {
  88. errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
  89. }
  90. }
  91. }
  92. void WholeStringBreakIterator::adoptText(CharacterIterator*) {
  93. UPRV_UNREACHABLE_EXIT; // should not be called
  94. }
  95. int32_t WholeStringBreakIterator::first() { return 0; }
  96. int32_t WholeStringBreakIterator::last() { return length; }
  97. int32_t WholeStringBreakIterator::previous() { return 0; }
  98. int32_t WholeStringBreakIterator::next() { return length; }
  99. int32_t WholeStringBreakIterator::current() const { return 0; }
  100. int32_t WholeStringBreakIterator::following(int32_t /*offset*/) { return length; }
  101. int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; }
  102. UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return false; }
  103. int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; }
  104. WholeStringBreakIterator *WholeStringBreakIterator::createBufferClone(
  105. void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) {
  106. if (U_SUCCESS(errorCode)) {
  107. errorCode = U_UNSUPPORTED_ERROR;
  108. }
  109. return nullptr;
  110. }
  111. WholeStringBreakIterator &WholeStringBreakIterator::refreshInputText(
  112. UText * /*input*/, UErrorCode &errorCode) {
  113. if (U_SUCCESS(errorCode)) {
  114. errorCode = U_UNSUPPORTED_ERROR;
  115. }
  116. return *this;
  117. }
  118. U_CFUNC
  119. BreakIterator *ustrcase_getTitleBreakIterator(
  120. const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
  121. LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode) {
  122. if (U_FAILURE(errorCode)) { return nullptr; }
  123. options &= U_TITLECASE_ITERATOR_MASK;
  124. if (options != 0 && iter != nullptr) {
  125. errorCode = U_ILLEGAL_ARGUMENT_ERROR;
  126. return nullptr;
  127. }
  128. if (iter == nullptr) {
  129. switch (options) {
  130. case 0:
  131. iter = BreakIterator::createWordInstance(
  132. locale != nullptr ? *locale : Locale(locID), errorCode);
  133. break;
  134. case U_TITLECASE_WHOLE_STRING:
  135. iter = new WholeStringBreakIterator();
  136. if (iter == nullptr) {
  137. errorCode = U_MEMORY_ALLOCATION_ERROR;
  138. }
  139. break;
  140. case U_TITLECASE_SENTENCES:
  141. iter = BreakIterator::createSentenceInstance(
  142. locale != nullptr ? *locale : Locale(locID), errorCode);
  143. break;
  144. default:
  145. errorCode = U_ILLEGAL_ARGUMENT_ERROR;
  146. break;
  147. }
  148. ownedIter.adoptInstead(iter);
  149. }
  150. return iter;
  151. }
  152. int32_t CaseMap::toTitle(
  153. const char *locale, uint32_t options, BreakIterator *iter,
  154. const char16_t *src, int32_t srcLength,
  155. char16_t *dest, int32_t destCapacity, Edits *edits,
  156. UErrorCode &errorCode) {
  157. LocalPointer<BreakIterator> ownedIter;
  158. iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
  159. if(iter==nullptr) {
  160. return 0;
  161. }
  162. UnicodeString s(srcLength<0, src, srcLength);
  163. iter->setText(s);
  164. return ustrcase_map(
  165. ustrcase_getCaseLocale(locale), options, iter,
  166. dest, destCapacity,
  167. src, srcLength,
  168. ustrcase_internalToTitle, edits, errorCode);
  169. }
  170. U_NAMESPACE_END
  171. U_NAMESPACE_USE
  172. U_CAPI int32_t U_EXPORT2
  173. u_strToTitle(char16_t *dest, int32_t destCapacity,
  174. const char16_t *src, int32_t srcLength,
  175. UBreakIterator *titleIter,
  176. const char *locale,
  177. UErrorCode *pErrorCode) {
  178. LocalPointer<BreakIterator> ownedIter;
  179. BreakIterator *iter = ustrcase_getTitleBreakIterator(
  180. nullptr, locale, 0, reinterpret_cast<BreakIterator *>(titleIter),
  181. ownedIter, *pErrorCode);
  182. if (iter == nullptr) {
  183. return 0;
  184. }
  185. UnicodeString s(srcLength<0, src, srcLength);
  186. iter->setText(s);
  187. return ustrcase_mapWithOverlap(
  188. ustrcase_getCaseLocale(locale), 0, iter,
  189. dest, destCapacity,
  190. src, srcLength,
  191. ustrcase_internalToTitle, *pErrorCode);
  192. }
  193. U_CAPI int32_t U_EXPORT2
  194. ucasemap_toTitle(UCaseMap *csm,
  195. char16_t *dest, int32_t destCapacity,
  196. const char16_t *src, int32_t srcLength,
  197. UErrorCode *pErrorCode) {
  198. if (U_FAILURE(*pErrorCode)) {
  199. return 0;
  200. }
  201. if (csm->iter == nullptr) {
  202. LocalPointer<BreakIterator> ownedIter;
  203. BreakIterator *iter = ustrcase_getTitleBreakIterator(
  204. nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
  205. if (iter == nullptr) {
  206. return 0;
  207. }
  208. csm->iter = ownedIter.orphan();
  209. }
  210. UnicodeString s(srcLength<0, src, srcLength);
  211. csm->iter->setText(s);
  212. return ustrcase_map(
  213. csm->caseLocale, csm->options, csm->iter,
  214. dest, destCapacity,
  215. src, srcLength,
  216. ustrcase_internalToTitle, nullptr, *pErrorCode);
  217. }
  218. #endif // !UCONFIG_NO_BREAK_ITERATION