bytesinkutil.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. // © 2017 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // bytesinkutil.h
  4. // created: 2017sep14 Markus W. Scherer
  5. #ifndef BYTESINKUTIL_H
  6. #define BYTESINKUTIL_H
  7. #include <type_traits>
  8. #include "unicode/utypes.h"
  9. #include "unicode/bytestream.h"
  10. #include "unicode/edits.h"
  11. #include "charstr.h"
  12. #include "cmemory.h"
  13. #include "uassert.h"
  14. #include "ustr_imp.h"
  15. U_NAMESPACE_BEGIN
  16. class ByteSink;
  17. class Edits;
  18. class U_COMMON_API CharStringByteSink : public ByteSink {
  19. public:
  20. CharStringByteSink(CharString* dest);
  21. ~CharStringByteSink() override;
  22. CharStringByteSink() = delete;
  23. CharStringByteSink(const CharStringByteSink&) = delete;
  24. CharStringByteSink& operator=(const CharStringByteSink&) = delete;
  25. void Append(const char* bytes, int32_t n) override;
  26. char* GetAppendBuffer(int32_t min_capacity,
  27. int32_t desired_capacity_hint,
  28. char* scratch,
  29. int32_t scratch_capacity,
  30. int32_t* result_capacity) override;
  31. private:
  32. CharString& dest_;
  33. };
  34. // CharString doesn't provide the public API that StringByteSink requires a
  35. // string class to have so this template specialization replaces the default
  36. // implementation of StringByteSink<CharString> with CharStringByteSink.
  37. template<>
  38. class StringByteSink<CharString> : public CharStringByteSink {
  39. public:
  40. StringByteSink(CharString* dest) : CharStringByteSink(dest) { }
  41. StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { }
  42. };
  43. class U_COMMON_API ByteSinkUtil {
  44. public:
  45. ByteSinkUtil() = delete; // all static
  46. /** (length) bytes were mapped to valid (s16, s16Length). */
  47. static UBool appendChange(int32_t length,
  48. const char16_t *s16, int32_t s16Length,
  49. ByteSink &sink, Edits *edits, UErrorCode &errorCode);
  50. /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
  51. static UBool appendChange(const uint8_t *s, const uint8_t *limit,
  52. const char16_t *s16, int32_t s16Length,
  53. ByteSink &sink, Edits *edits, UErrorCode &errorCode);
  54. /** (length) bytes were mapped/changed to valid code point c. */
  55. static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
  56. /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
  57. static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
  58. ByteSink &sink, Edits *edits = nullptr) {
  59. appendCodePoint(static_cast<int32_t>(nextSrc - src), c, sink, edits);
  60. }
  61. /** Append the two-byte character (U+0080..U+07FF). */
  62. static void appendTwoBytes(UChar32 c, ByteSink &sink);
  63. static UBool appendUnchanged(const uint8_t *s, int32_t length,
  64. ByteSink &sink, uint32_t options, Edits *edits,
  65. UErrorCode &errorCode) {
  66. if (U_FAILURE(errorCode)) { return false; }
  67. if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
  68. return true;
  69. }
  70. static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
  71. ByteSink &sink, uint32_t options, Edits *edits,
  72. UErrorCode &errorCode);
  73. /**
  74. * Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
  75. * and then returns through u_terminateChars(), in order to implement
  76. * the classic ICU4C C API writing to a fix sized buffer on top of a
  77. * contemporary C++ API.
  78. *
  79. * @param buffer receiving buffer
  80. * @param capacity capacity of receiving buffer
  81. * @param lambda that gets called with the sink as an argument
  82. * @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
  83. * @return number of bytes written, or needed (in case of overflow)
  84. * @internal
  85. */
  86. template <typename F,
  87. typename = std::enable_if_t<
  88. std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
  89. static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
  90. F&& lambda,
  91. UErrorCode& status) {
  92. if (U_FAILURE(status)) { return 0; }
  93. CheckedArrayByteSink sink(buffer, capacity);
  94. lambda(sink, status);
  95. if (U_FAILURE(status)) { return 0; }
  96. int32_t reslen = sink.NumberOfBytesAppended();
  97. if (sink.Overflowed()) {
  98. status = U_BUFFER_OVERFLOW_ERROR;
  99. return reslen;
  100. }
  101. return u_terminateChars(buffer, capacity, reslen, &status);
  102. }
  103. /**
  104. * Calls a lambda that writes to a ByteSink with a CharStringByteSink and
  105. * then returns a CharString, in order to implement a contemporary C++ API
  106. * on top of a C/C++ compatibility ByteSink API.
  107. *
  108. * @param lambda that gets called with the sink as an argument
  109. * @param status to check and report
  110. * @return the resulting string, or an empty string (in case of error)
  111. * @internal
  112. */
  113. template <typename F,
  114. typename = std::enable_if_t<
  115. std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
  116. static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
  117. if (U_FAILURE(status)) { return {}; }
  118. CharString result;
  119. CharStringByteSink sink(&result);
  120. lambda(sink, status);
  121. return result;
  122. }
  123. private:
  124. static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
  125. ByteSink &sink, uint32_t options, Edits *edits);
  126. };
  127. U_NAMESPACE_END
  128. #endif //BYTESINKUTIL_H