stringoptions.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. // © 2017 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // stringoptions.h
  4. // created: 2017jun08 Markus W. Scherer
  5. #ifndef __STRINGOPTIONS_H__
  6. #define __STRINGOPTIONS_H__
  7. #include "unicode/utypes.h"
  8. /**
  9. * \file
  10. * \brief C API: Bit set option bit constants for various string and character processing functions.
  11. */
  12. /**
  13. * Option value for case folding: Use default mappings defined in CaseFolding.txt.
  14. *
  15. * @stable ICU 2.0
  16. */
  17. #define U_FOLD_CASE_DEFAULT 0
  18. /**
  19. * Option value for case folding:
  20. *
  21. * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
  22. * and dotless i appropriately for Turkic languages (tr, az).
  23. *
  24. * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
  25. * are to be included for default mappings and
  26. * excluded for the Turkic-specific mappings.
  27. *
  28. * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
  29. * are to be excluded for default mappings and
  30. * included for the Turkic-specific mappings.
  31. *
  32. * @stable ICU 2.0
  33. */
  34. #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
  35. /**
  36. * Titlecase the string as a whole rather than each word.
  37. * (Titlecase only the character at index 0, possibly adjusted.)
  38. * Option bits value for titlecasing APIs that take an options bit set.
  39. *
  40. * It is an error to specify multiple titlecasing iterator options together,
  41. * including both an options bit and an explicit BreakIterator.
  42. *
  43. * @see U_TITLECASE_ADJUST_TO_CASED
  44. * @stable ICU 60
  45. */
  46. #define U_TITLECASE_WHOLE_STRING 0x20
  47. /**
  48. * Titlecase sentences rather than words.
  49. * (Titlecase only the first character of each sentence, possibly adjusted.)
  50. * Option bits value for titlecasing APIs that take an options bit set.
  51. *
  52. * It is an error to specify multiple titlecasing iterator options together,
  53. * including both an options bit and an explicit BreakIterator.
  54. *
  55. * @see U_TITLECASE_ADJUST_TO_CASED
  56. * @stable ICU 60
  57. */
  58. #define U_TITLECASE_SENTENCES 0x40
  59. /**
  60. * Do not lowercase non-initial parts of words when titlecasing.
  61. * Option bit for titlecasing APIs that take an options bit set.
  62. *
  63. * By default, titlecasing will titlecase the character at each
  64. * (possibly adjusted) BreakIterator index and
  65. * lowercase all other characters up to the next iterator index.
  66. * With this option, the other characters will not be modified.
  67. *
  68. * @see U_TITLECASE_ADJUST_TO_CASED
  69. * @see UnicodeString::toTitle
  70. * @see CaseMap::toTitle
  71. * @see ucasemap_setOptions
  72. * @see ucasemap_toTitle
  73. * @see ucasemap_utf8ToTitle
  74. * @stable ICU 3.8
  75. */
  76. #define U_TITLECASE_NO_LOWERCASE 0x100
  77. /**
  78. * Do not adjust the titlecasing BreakIterator indexes;
  79. * titlecase exactly the characters at breaks from the iterator.
  80. * Option bit for titlecasing APIs that take an options bit set.
  81. *
  82. * By default, titlecasing will take each break iterator index,
  83. * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
  84. * and titlecase that one.
  85. *
  86. * Other characters are lowercased.
  87. *
  88. * It is an error to specify multiple titlecasing adjustment options together.
  89. *
  90. * @see U_TITLECASE_ADJUST_TO_CASED
  91. * @see U_TITLECASE_NO_LOWERCASE
  92. * @see UnicodeString::toTitle
  93. * @see CaseMap::toTitle
  94. * @see ucasemap_setOptions
  95. * @see ucasemap_toTitle
  96. * @see ucasemap_utf8ToTitle
  97. * @stable ICU 3.8
  98. */
  99. #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
  100. /**
  101. * Adjust each titlecasing BreakIterator index to the next cased character.
  102. * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
  103. * Option bit for titlecasing APIs that take an options bit set.
  104. *
  105. * This used to be the default index adjustment in ICU.
  106. * Since ICU 60, the default index adjustment is to the next character that is
  107. * a letter, number, symbol, or private use code point.
  108. * (Uncased modifier letters are skipped.)
  109. * The difference in behavior is small for word titlecasing,
  110. * but the new adjustment is much better for whole-string and sentence titlecasing:
  111. * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
  112. *
  113. * It is an error to specify multiple titlecasing adjustment options together.
  114. *
  115. * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
  116. * @stable ICU 60
  117. */
  118. #define U_TITLECASE_ADJUST_TO_CASED 0x400
  119. /**
  120. * Option for string transformation functions to not first reset the Edits object.
  121. * Used for example in some case-mapping and normalization functions.
  122. *
  123. * @see CaseMap
  124. * @see Edits
  125. * @see Normalizer2
  126. * @stable ICU 60
  127. */
  128. #define U_EDITS_NO_RESET 0x2000
  129. /**
  130. * Omit unchanged text when recording how source substrings
  131. * relate to changed and unchanged result substrings.
  132. * Used for example in some case-mapping and normalization functions.
  133. *
  134. * @see CaseMap
  135. * @see Edits
  136. * @see Normalizer2
  137. * @stable ICU 60
  138. */
  139. #define U_OMIT_UNCHANGED_TEXT 0x4000
  140. /**
  141. * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
  142. * Compare strings in code point order instead of code unit order.
  143. * @stable ICU 2.2
  144. */
  145. #define U_COMPARE_CODE_POINT_ORDER 0x8000
  146. /**
  147. * Option bit for unorm_compare:
  148. * Perform case-insensitive comparison.
  149. * @stable ICU 2.2
  150. */
  151. #define U_COMPARE_IGNORE_CASE 0x10000
  152. /**
  153. * Option bit for unorm_compare:
  154. * Both input strings are assumed to fulfill FCD conditions.
  155. * @stable ICU 2.2
  156. */
  157. #define UNORM_INPUT_IS_FCD 0x20000
  158. // Related definitions elsewhere.
  159. // Options that are not meaningful in the same functions
  160. // can share the same bits.
  161. //
  162. // Public:
  163. // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
  164. //
  165. // Internal: (may change or be removed)
  166. // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
  167. // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
  168. // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
  169. // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
  170. // ustr_imp.h #define _STRNCMP_STYLE 0x1000
  171. // unormcmp.cpp #define _COMPARE_EQUIV 0x80000
  172. #endif // __STRINGOPTIONS_H__