uniquecharstr.h 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. // © 2020 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // uniquecharstr.h
  4. // created: 2020sep01 Frank Yung-Fong Tang
  5. #ifndef __UNIQUECHARSTR_H__
  6. #define __UNIQUECHARSTR_H__
  7. #include "charstr.h"
  8. #include "uassert.h"
  9. #include "uhash.h"
  10. #include "cmemory.h"
  11. U_NAMESPACE_BEGIN
  12. /**
  13. * Stores NUL-terminated strings with duplicate elimination.
  14. * Checks for unique UTF-16 string pointers and converts to invariant characters.
  15. *
  16. * Intended to be stack-allocated. Add strings, get a unique number for each,
  17. * freeze the object, get a char * pointer for each string,
  18. * call orphanCharStrings() to capture the string storage, and let this object go out of scope.
  19. */
  20. class UniqueCharStrings {
  21. public:
  22. UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
  23. // Note: We hash on string contents but store stable char16_t * pointers.
  24. // If the strings are stored in resource bundles which should be built with
  25. // duplicate elimination, then we should be able to hash on just the pointer values.
  26. uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
  27. if (U_FAILURE(errorCode)) { return; }
  28. strings = new CharString();
  29. if (strings == nullptr) {
  30. errorCode = U_MEMORY_ALLOCATION_ERROR;
  31. }
  32. }
  33. ~UniqueCharStrings() {
  34. uhash_close(&map);
  35. delete strings;
  36. }
  37. /** Returns/orphans the CharString that contains all strings. */
  38. CharString *orphanCharStrings() {
  39. CharString *result = strings;
  40. strings = nullptr;
  41. return result;
  42. }
  43. /**
  44. * Adds a NUL-terminated string and returns a unique number for it.
  45. * The string must not change, nor move around in memory,
  46. * while this UniqueCharStrings is in use.
  47. *
  48. * Best used with string data in a stable storage, such as strings returned
  49. * by resource bundle functions.
  50. */
  51. int32_t add(const char16_t*p, UErrorCode &errorCode) {
  52. if (U_FAILURE(errorCode)) { return -1; }
  53. if (isFrozen) {
  54. errorCode = U_NO_WRITE_PERMISSION;
  55. return -1;
  56. }
  57. // The string points into the resource bundle.
  58. int32_t oldIndex = uhash_geti(&map, p);
  59. if (oldIndex != 0) { // found duplicate
  60. return oldIndex;
  61. }
  62. // Explicit NUL terminator for the previous string.
  63. // The strings object is also terminated with one implicit NUL.
  64. strings->append(0, errorCode);
  65. int32_t newIndex = strings->length();
  66. strings->appendInvariantChars(p, u_strlen(p), errorCode);
  67. uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
  68. return newIndex;
  69. }
  70. /**
  71. * Adds a unicode string by value and returns a unique number for it.
  72. */
  73. int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
  74. if (U_FAILURE(errorCode)) { return -1; }
  75. if (isFrozen) {
  76. errorCode = U_NO_WRITE_PERMISSION;
  77. return -1;
  78. }
  79. int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
  80. if (oldIndex != 0) { // found duplicate
  81. return oldIndex;
  82. }
  83. // We need to store the string content of the UnicodeString.
  84. UnicodeString *key = keyStore.create(s);
  85. if (key == nullptr) {
  86. errorCode = U_MEMORY_ALLOCATION_ERROR;
  87. return -1;
  88. }
  89. return add(key->getTerminatedBuffer(), errorCode);
  90. }
  91. void freeze() { isFrozen = true; }
  92. /**
  93. * Returns a string pointer for its unique number, if this object is frozen.
  94. * Otherwise nullptr.
  95. */
  96. const char *get(int32_t i) const {
  97. U_ASSERT(isFrozen);
  98. return isFrozen && i > 0 ? strings->data() + i : nullptr;
  99. }
  100. private:
  101. UHashtable map;
  102. CharString *strings;
  103. MemoryPool<UnicodeString> keyStore;
  104. bool isFrozen = false;
  105. };
  106. U_NAMESPACE_END
  107. #endif // __UNIQUECHARSTR_H__