bytesinkutil.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // © 2017 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // bytesinkutil.cpp
  4. // created: 2017sep14 Markus W. Scherer
  5. #include "unicode/utypes.h"
  6. #include "unicode/bytestream.h"
  7. #include "unicode/edits.h"
  8. #include "unicode/stringoptions.h"
  9. #include "unicode/utf8.h"
  10. #include "unicode/utf16.h"
  11. #include "bytesinkutil.h"
  12. #include "charstr.h"
  13. #include "cmemory.h"
  14. #include "uassert.h"
  15. U_NAMESPACE_BEGIN
  16. UBool
  17. ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
  18. ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
  19. if (U_FAILURE(errorCode)) { return false; }
  20. char scratch[200];
  21. int32_t s8Length = 0;
  22. for (int32_t i = 0; i < s16Length;) {
  23. int32_t capacity;
  24. int32_t desiredCapacity = s16Length - i;
  25. if (desiredCapacity < (INT32_MAX / 3)) {
  26. desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit
  27. } else if (desiredCapacity < (INT32_MAX / 2)) {
  28. desiredCapacity *= 2;
  29. } else {
  30. desiredCapacity = INT32_MAX;
  31. }
  32. char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
  33. scratch, UPRV_LENGTHOF(scratch), &capacity);
  34. capacity -= U8_MAX_LENGTH - 1;
  35. int32_t j = 0;
  36. for (; i < s16Length && j < capacity;) {
  37. UChar32 c;
  38. U16_NEXT_UNSAFE(s16, i, c);
  39. U8_APPEND_UNSAFE(buffer, j, c);
  40. }
  41. if (j > (INT32_MAX - s8Length)) {
  42. errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
  43. return false;
  44. }
  45. sink.Append(buffer, j);
  46. s8Length += j;
  47. }
  48. if (edits != nullptr) {
  49. edits->addReplace(length, s8Length);
  50. }
  51. return true;
  52. }
  53. UBool
  54. ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
  55. const char16_t *s16, int32_t s16Length,
  56. ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
  57. if (U_FAILURE(errorCode)) { return false; }
  58. if ((limit - s) > INT32_MAX) {
  59. errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
  60. return false;
  61. }
  62. return appendChange(static_cast<int32_t>(limit - s), s16, s16Length, sink, edits, errorCode);
  63. }
  64. void
  65. ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
  66. char s8[U8_MAX_LENGTH];
  67. int32_t s8Length = 0;
  68. U8_APPEND_UNSAFE(s8, s8Length, c);
  69. if (edits != nullptr) {
  70. edits->addReplace(length, s8Length);
  71. }
  72. sink.Append(s8, s8Length);
  73. }
  74. namespace {
  75. // See unicode/utf8.h U8_APPEND_UNSAFE().
  76. inline uint8_t getTwoByteLead(UChar32 c) { return static_cast<uint8_t>((c >> 6) | 0xc0); }
  77. inline uint8_t getTwoByteTrail(UChar32 c) { return static_cast<uint8_t>((c & 0x3f) | 0x80); }
  78. } // namespace
  79. void
  80. ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
  81. U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
  82. char s8[2] = {static_cast<char>(getTwoByteLead(c)), static_cast<char>(getTwoByteTrail(c))};
  83. sink.Append(s8, 2);
  84. }
  85. void
  86. ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
  87. ByteSink &sink, uint32_t options, Edits *edits) {
  88. U_ASSERT(length > 0);
  89. if (edits != nullptr) {
  90. edits->addUnchanged(length);
  91. }
  92. if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
  93. sink.Append(reinterpret_cast<const char *>(s), length);
  94. }
  95. }
  96. UBool
  97. ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
  98. ByteSink &sink, uint32_t options, Edits *edits,
  99. UErrorCode &errorCode) {
  100. if (U_FAILURE(errorCode)) { return false; }
  101. if ((limit - s) > INT32_MAX) {
  102. errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
  103. return false;
  104. }
  105. int32_t length = static_cast<int32_t>(limit - s);
  106. if (length > 0) {
  107. appendNonEmptyUnchanged(s, length, sink, options, edits);
  108. }
  109. return true;
  110. }
  111. CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
  112. }
  113. CharStringByteSink::~CharStringByteSink() = default;
  114. void
  115. CharStringByteSink::Append(const char* bytes, int32_t n) {
  116. UErrorCode status = U_ZERO_ERROR;
  117. dest_.append(bytes, n, status);
  118. // Any errors are silently ignored.
  119. }
  120. char*
  121. CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
  122. int32_t desired_capacity_hint,
  123. char* scratch,
  124. int32_t scratch_capacity,
  125. int32_t* result_capacity) {
  126. if (min_capacity < 1 || scratch_capacity < min_capacity) {
  127. *result_capacity = 0;
  128. return nullptr;
  129. }
  130. UErrorCode status = U_ZERO_ERROR;
  131. char* result = dest_.getAppendBuffer(
  132. min_capacity,
  133. desired_capacity_hint,
  134. *result_capacity,
  135. status);
  136. if (U_SUCCESS(status)) {
  137. return result;
  138. }
  139. *result_capacity = scratch_capacity;
  140. return scratch;
  141. }
  142. U_NAMESPACE_END