rbt_set.h 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1999-2007, International Business Machines Corporation
  6. * and others. All Rights Reserved.
  7. **********************************************************************
  8. * Date Name Description
  9. * 11/17/99 aliu Creation.
  10. **********************************************************************
  11. */
  12. #ifndef RBT_SET_H
  13. #define RBT_SET_H
  14. #include "unicode/utypes.h"
  15. #if !UCONFIG_NO_TRANSLITERATION
  16. #include "unicode/uobject.h"
  17. #include "unicode/utrans.h"
  18. #include "uvector.h"
  19. U_NAMESPACE_BEGIN
  20. class Replaceable;
  21. class TransliterationRule;
  22. class TransliterationRuleData;
  23. class UnicodeFilter;
  24. class UnicodeString;
  25. class UnicodeSet;
  26. /**
  27. * A set of rules for a <code>RuleBasedTransliterator</code>.
  28. * @author Alan Liu
  29. */
  30. class TransliterationRuleSet : public UMemory {
  31. /**
  32. * Vector of rules, in the order added. This is used while the
  33. * rule set is getting built. After that, freeze() reorders and
  34. * indexes the rules into rules[]. Any given rule is stored once
  35. * in ruleVector, and one or more times in rules[]. ruleVector
  36. * owns and deletes the rules.
  37. */
  38. UVector* ruleVector;
  39. /**
  40. * Sorted and indexed table of rules. This is created by freeze()
  41. * from the rules in ruleVector. It contains alias pointers to
  42. * the rules in ruleVector. It is zero before freeze() is called
  43. * and non-zero thereafter.
  44. */
  45. TransliterationRule** rules;
  46. /**
  47. * Index table. For text having a first character c, compute x = c&0xFF.
  48. * Now use rules[index[x]..index[x+1]-1]. This index table is created by
  49. * freeze(). Before freeze() is called it contains garbage.
  50. */
  51. int32_t index[257];
  52. /**
  53. * Length of the longest preceding context
  54. */
  55. int32_t maxContextLength;
  56. public:
  57. /**
  58. * Construct a new empty rule set.
  59. * @param status Output parameter filled in with success or failure status.
  60. */
  61. TransliterationRuleSet(UErrorCode& status);
  62. /**
  63. * Copy constructor.
  64. */
  65. TransliterationRuleSet(const TransliterationRuleSet&);
  66. /**
  67. * Destructor.
  68. */
  69. virtual ~TransliterationRuleSet();
  70. /**
  71. * Change the data object that this rule belongs to. Used
  72. * internally by the TransliterationRuleData copy constructor.
  73. * @param data the new data value to be set.
  74. */
  75. void setData(const TransliterationRuleData* data);
  76. /**
  77. * Return the maximum context length.
  78. * @return the length of the longest preceding context.
  79. */
  80. virtual int32_t getMaximumContextLength() const;
  81. /**
  82. * Add a rule to this set. Rules are added in order, and order is
  83. * significant. The last call to this method must be followed by
  84. * a call to <code>freeze()</code> before the rule set is used.
  85. * This method must <em>not</em> be called after freeze() has been
  86. * called.
  87. *
  88. * @param adoptedRule the rule to add
  89. */
  90. virtual void addRule(TransliterationRule* adoptedRule,
  91. UErrorCode& status);
  92. /**
  93. * Check this for masked rules and index it to optimize performance.
  94. * The sequence of operations is: (1) add rules to a set using
  95. * <code>addRule()</code>; (2) freeze the set using
  96. * <code>freeze()</code>; (3) use the rule set. If
  97. * <code>addRule()</code> is called after calling this method, it
  98. * invalidates this object, and this method must be called again.
  99. * That is, <code>freeze()</code> may be called multiple times,
  100. * although for optimal performance it shouldn't be.
  101. * @param parseError A pointer to UParseError to receive information about errors
  102. * occurred.
  103. * @param status Output parameter filled in with success or failure status.
  104. */
  105. virtual void freeze(UParseError& parseError, UErrorCode& status);
  106. /**
  107. * Transliterate the given text with the given UTransPosition
  108. * indices. Return true if the transliteration should continue
  109. * or false if it should halt (because of a U_PARTIAL_MATCH match).
  110. * Note that false is only ever returned if isIncremental is true.
  111. * @param text the text to be transliterated
  112. * @param index the position indices, which will be updated
  113. * @param isIncremental if true, assume new text may be inserted
  114. * at index.limit, and return false if thrre is a partial match.
  115. * @return true unless a U_PARTIAL_MATCH has been obtained,
  116. * indicating that transliteration should stop until more text
  117. * arrives.
  118. */
  119. UBool transliterate(Replaceable& text,
  120. UTransPosition& index,
  121. UBool isIncremental);
  122. /**
  123. * Create rule strings that represents this rule set.
  124. * @param result string to receive the rule strings. Current
  125. * contents will be deleted.
  126. * @param escapeUnprintable True, will escape the unprintable characters
  127. * @return A reference to 'result'.
  128. */
  129. virtual UnicodeString& toRules(UnicodeString& result,
  130. UBool escapeUnprintable) const;
  131. /**
  132. * Return the set of all characters that may be modified
  133. * (getTarget=false) or emitted (getTarget=true) by this set.
  134. */
  135. UnicodeSet& getSourceTargetSet(UnicodeSet& result,
  136. UBool getTarget) const;
  137. private:
  138. TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
  139. };
  140. U_NAMESPACE_END
  141. #endif /* #if !UCONFIG_NO_TRANSLITERATION */
  142. #endif