unimatch.h 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
  5. **********************************************************************
  6. * Date Name Description
  7. * 07/18/01 aliu Creation.
  8. **********************************************************************
  9. */
  10. #ifndef UNIMATCH_H
  11. #define UNIMATCH_H
  12. #include "unicode/utypes.h"
  13. /**
  14. * \file
  15. * \brief C++ API: Unicode Matcher
  16. */
  17. #if U_SHOW_CPLUSPLUS_API
  18. U_NAMESPACE_BEGIN
  19. class Replaceable;
  20. class UnicodeString;
  21. class UnicodeSet;
  22. /**
  23. * Constants returned by <code>UnicodeMatcher::matches()</code>
  24. * indicating the degree of match.
  25. * @stable ICU 2.4
  26. */
  27. enum UMatchDegree {
  28. /**
  29. * Constant returned by <code>matches()</code> indicating a
  30. * mismatch between the text and this matcher. The text contains
  31. * a character which does not match, or the text does not contain
  32. * all desired characters for a non-incremental match.
  33. * @stable ICU 2.4
  34. */
  35. U_MISMATCH,
  36. /**
  37. * Constant returned by <code>matches()</code> indicating a
  38. * partial match between the text and this matcher. This value is
  39. * only returned for incremental match operations. All characters
  40. * of the text match, but more characters are required for a
  41. * complete match. Alternatively, for variable-length matchers,
  42. * all characters of the text match, and if more characters were
  43. * supplied at limit, they might also match.
  44. * @stable ICU 2.4
  45. */
  46. U_PARTIAL_MATCH,
  47. /**
  48. * Constant returned by <code>matches()</code> indicating a
  49. * complete match between the text and this matcher. For an
  50. * incremental variable-length match, this value is returned if
  51. * the given text matches, and it is known that additional
  52. * characters would not alter the extent of the match.
  53. * @stable ICU 2.4
  54. */
  55. U_MATCH
  56. };
  57. /**
  58. * <code>UnicodeMatcher</code> defines a protocol for objects that can
  59. * match a range of characters in a Replaceable string.
  60. * @stable ICU 2.4
  61. */
  62. class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
  63. public:
  64. /**
  65. * Destructor.
  66. * @stable ICU 2.4
  67. */
  68. virtual ~UnicodeMatcher();
  69. /**
  70. * Return a UMatchDegree value indicating the degree of match for
  71. * the given text at the given offset. Zero, one, or more
  72. * characters may be matched.
  73. *
  74. * Matching in the forward direction is indicated by limit >
  75. * offset. Characters from offset forwards to limit-1 will be
  76. * considered for matching.
  77. *
  78. * Matching in the reverse direction is indicated by limit <
  79. * offset. Characters from offset backwards to limit+1 will be
  80. * considered for matching.
  81. *
  82. * If limit == offset then the only match possible is a zero
  83. * character match (which subclasses may implement if desired).
  84. *
  85. * As a side effect, advance the offset parameter to the limit of
  86. * the matched substring. In the forward direction, this will be
  87. * the index of the last matched character plus one. In the
  88. * reverse direction, this will be the index of the last matched
  89. * character minus one.
  90. *
  91. * <p>Note: This method is not const because some classes may
  92. * modify their state as the result of a match.
  93. *
  94. * @param text the text to be matched
  95. * @param offset on input, the index into text at which to begin
  96. * matching. On output, the limit of the matched text. The
  97. * number of matched characters is the output value of offset
  98. * minus the input value. Offset should always point to the
  99. * HIGH SURROGATE (leading code unit) of a pair of surrogates,
  100. * both on entry and upon return.
  101. * @param limit the limit index of text to be matched. Greater
  102. * than offset for a forward direction match, less than offset for
  103. * a backward direction match. The last character to be
  104. * considered for matching will be text.charAt(limit-1) in the
  105. * forward direction or text.charAt(limit+1) in the backward
  106. * direction.
  107. * @param incremental if true, then assume further characters may
  108. * be inserted at limit and check for partial matching. Otherwise
  109. * assume the text as given is complete.
  110. * @return a match degree value indicating a full match, a partial
  111. * match, or a mismatch. If incremental is false then
  112. * U_PARTIAL_MATCH should never be returned.
  113. * @stable ICU 2.4
  114. */
  115. virtual UMatchDegree matches(const Replaceable& text,
  116. int32_t& offset,
  117. int32_t limit,
  118. UBool incremental) = 0;
  119. /**
  120. * Returns a string representation of this matcher. If the result of
  121. * calling this function is passed to the appropriate parser, it
  122. * will produce another matcher that is equal to this one.
  123. * @param result the string to receive the pattern. Previous
  124. * contents will be deleted.
  125. * @param escapeUnprintable if true then convert unprintable
  126. * character to their hex escape representations, \\uxxxx or
  127. * \\Uxxxxxxxx. Unprintable characters are those other than
  128. * U+000A, U+0020..U+007E.
  129. * @stable ICU 2.4
  130. */
  131. virtual UnicodeString& toPattern(UnicodeString& result,
  132. UBool escapeUnprintable = false) const = 0;
  133. /**
  134. * Returns true if this matcher will match a character c, where c
  135. * & 0xFF == v, at offset, in the forward direction (with limit >
  136. * offset). This is used by <tt>RuleBasedTransliterator</tt> for
  137. * indexing.
  138. * @stable ICU 2.4
  139. */
  140. virtual UBool matchesIndexValue(uint8_t v) const = 0;
  141. /**
  142. * Union the set of all characters that may be matched by this object
  143. * into the given set.
  144. * @param toUnionTo the set into which to union the source characters
  145. * @stable ICU 2.4
  146. */
  147. virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
  148. };
  149. U_NAMESPACE_END
  150. #endif /* U_SHOW_CPLUSPLUS_API */
  151. #endif