rbt_data.h 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1999-2007, International Business Machines Corporation
  6. * and others. All Rights Reserved.
  7. **********************************************************************
  8. * Date Name Description
  9. * 11/17/99 aliu Creation.
  10. **********************************************************************
  11. */
  12. #ifndef RBT_DATA_H
  13. #define RBT_DATA_H
  14. #include "unicode/utypes.h"
  15. #include "unicode/uclean.h"
  16. #if !UCONFIG_NO_TRANSLITERATION
  17. #include "unicode/uobject.h"
  18. #include "rbt_set.h"
  19. #include "hash.h"
  20. U_NAMESPACE_BEGIN
  21. class UnicodeFunctor;
  22. class UnicodeMatcher;
  23. class UnicodeReplacer;
  24. /**
  25. * The rule data for a RuleBasedTransliterators. RBT objects hold
  26. * a const pointer to a TRD object that they do not own. TRD objects
  27. * are essentially the parsed rules in compact, usable form. The
  28. * TRD objects themselves are held for the life of the process in
  29. * a static cache owned by Transliterator.
  30. *
  31. * This class' API is a little asymmetric. There is a method to
  32. * define a variable, but no way to define a set. This is because the
  33. * sets are defined by the parser in a UVector, and the vector is
  34. * copied into a fixed-size array here. Once this is done, no new
  35. * sets may be defined. In practice, there is no need to do so, since
  36. * generating the data and using it are discrete phases. When there
  37. * is a need to access the set data during the parse phase, another
  38. * data structure handles this. See the parsing code for more
  39. * details.
  40. */
  41. class TransliterationRuleData : public UMemory {
  42. public:
  43. // PUBLIC DATA MEMBERS
  44. /**
  45. * Rule table. May be empty.
  46. */
  47. TransliterationRuleSet ruleSet;
  48. /**
  49. * Map variable name (String) to variable (UnicodeString). A variable name
  50. * corresponds to zero or more characters, stored in a UnicodeString in
  51. * this hash. One or more of these chars may also correspond to a
  52. * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
  53. * a stand-in: it is an index for a secondary lookup in
  54. * data.variables. The stand-in also represents the UnicodeMatcher in
  55. * the stored rules.
  56. */
  57. Hashtable variableNames;
  58. /**
  59. * Map category variable (char16_t) to set (UnicodeFunctor).
  60. * Variables that correspond to a set of characters are mapped
  61. * from variable name to a stand-in character in data.variableNames.
  62. * The stand-in then serves as a key in this hash to lookup the
  63. * actual UnicodeFunctor object. In addition, the stand-in is
  64. * stored in the rule text to represent the set of characters.
  65. * variables[i] represents character (variablesBase + i).
  66. */
  67. UnicodeFunctor** variables;
  68. /**
  69. * Flag that indicates whether the variables are owned (if a single
  70. * call to Transliterator::createFromRules() produces a CompoundTransliterator
  71. * with more than one RuleBasedTransliterator as children, they all share
  72. * the same variables list, so only the first one is considered to own
  73. * the variables)
  74. */
  75. UBool variablesAreOwned;
  76. /**
  77. * The character that represents variables[0]. Characters
  78. * variablesBase through variablesBase +
  79. * variablesLength - 1 represent UnicodeFunctor objects.
  80. */
  81. char16_t variablesBase;
  82. /**
  83. * The length of variables.
  84. */
  85. int32_t variablesLength;
  86. public:
  87. /**
  88. * Constructor
  89. * @param status Output param set to success/failure code on exit.
  90. */
  91. TransliterationRuleData(UErrorCode& status);
  92. /**
  93. * Copy Constructor
  94. */
  95. TransliterationRuleData(const TransliterationRuleData&);
  96. /**
  97. * destructor
  98. */
  99. ~TransliterationRuleData();
  100. /**
  101. * Given a stand-in character, return the UnicodeFunctor that it
  102. * represents, or nullptr if it doesn't represent anything.
  103. * @param standIn the given stand-in character.
  104. * @return the UnicodeFunctor that 'standIn' represents
  105. */
  106. UnicodeFunctor* lookup(UChar32 standIn) const;
  107. /**
  108. * Given a stand-in character, return the UnicodeMatcher that it
  109. * represents, or nullptr if it doesn't represent anything or if it
  110. * represents something that is not a matcher.
  111. * @param standIn the given stand-in character.
  112. * @return return the UnicodeMatcher that 'standIn' represents
  113. */
  114. UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
  115. /**
  116. * Given a stand-in character, return the UnicodeReplacer that it
  117. * represents, or nullptr if it doesn't represent anything or if it
  118. * represents something that is not a replacer.
  119. * @param standIn the given stand-in character.
  120. * @return return the UnicodeReplacer that 'standIn' represents
  121. */
  122. UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
  123. private:
  124. TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
  125. };
  126. U_NAMESPACE_END
  127. #endif /* #if !UCONFIG_NO_TRANSLITERATION */
  128. #endif