mkql_string_util.cpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. #include "mkql_string_util.h"
  2. namespace NKikimr {
  3. namespace NMiniKQL {
  4. namespace {
  5. ui32 CheckedSum(ui32 one, ui32 two) {
  6. if (ui64(one) + ui64(two) > ui64(std::numeric_limits<ui32>::max()))
  7. ythrow yexception() << "Impossible to concat too large strings " << one << " and " << two << " bytes!";
  8. return one + two;
  9. }
  10. }
  11. NUdf::TUnboxedValuePod AppendString(const NUdf::TUnboxedValuePod value, const NUdf::TStringRef ref)
  12. {
  13. if (!ref.Size())
  14. return value;
  15. const auto& valueRef = value.AsStringRef();
  16. if (!valueRef.Size())
  17. return MakeString(ref);
  18. const auto newSize = CheckedSum(valueRef.Size(), ref.Size());
  19. if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) {
  20. auto result = NUdf::TUnboxedValuePod::Embedded(newSize);
  21. const auto buf = result.AsStringRef().Data();
  22. std::memcpy(buf, valueRef.Data(), valueRef.Size());
  23. std::memcpy(buf + valueRef.Size(), ref.Data(), ref.Size());
  24. return result;
  25. } else {
  26. if (value.IsString()) {
  27. auto str = value.AsStringValue();
  28. const ui32 offset = ref.Data() - str.Data();
  29. if (str.Size() == valueRef.Size() + offset) {
  30. if (str.TryExpandOn(ref.Size())) {
  31. std::memcpy(str.Data() + offset + valueRef.Size(), ref.Data(), ref.Size());
  32. return NUdf::TUnboxedValuePod(std::move(str), newSize, offset);
  33. }
  34. }
  35. }
  36. auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2);
  37. NUdf::TStringValue str(data);
  38. data->UnRef();
  39. std::memcpy(str.Data(), valueRef.Data(), valueRef.Size());
  40. std::memcpy(str.Data() + valueRef.Size(), ref.Data(), ref.Size());
  41. return NUdf::TUnboxedValuePod(std::move(str));
  42. }
  43. }
  44. NUdf::TUnboxedValuePod PrependString(const NUdf::TStringRef ref, const NUdf::TUnboxedValuePod value)
  45. {
  46. if (!ref.Size())
  47. return value;
  48. const auto& valueRef = value.AsStringRef();
  49. if (!valueRef.Size())
  50. return MakeString(ref);
  51. const auto newSize = CheckedSum(valueRef.Size(), ref.Size());
  52. if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) {
  53. auto result = NUdf::TUnboxedValuePod::Embedded(newSize);
  54. const auto buf = result.AsStringRef().Data();
  55. std::memcpy(buf, ref.Data(), ref.Size());
  56. std::memcpy(buf + ref.Size(), valueRef.Data(), valueRef.Size());
  57. return result;
  58. } else {
  59. auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2);
  60. NUdf::TStringValue str(data);
  61. data->UnRef();
  62. std::memcpy(str.Data(), ref.Data(), ref.Size());
  63. std::memcpy(str.Data() + ref.Size(), valueRef.Data(), valueRef.Size());
  64. value.DeleteUnreferenced();
  65. return NUdf::TUnboxedValuePod(std::move(str));
  66. }
  67. }
  68. NUdf::TUnboxedValuePod ConcatStrings(const NUdf::TUnboxedValuePod first, const NUdf::TUnboxedValuePod second)
  69. {
  70. const auto& leftRef = first.AsStringRef();
  71. if (!leftRef.Size())
  72. return second;
  73. const auto& rightRef = second.AsStringRef();
  74. if (!rightRef.Size())
  75. return first;
  76. const auto newSize = CheckedSum(leftRef.Size(), rightRef.Size());
  77. if (newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) {
  78. auto result = NUdf::TUnboxedValuePod::Embedded(newSize);
  79. const auto buf = result.AsStringRef().Data();
  80. std::memcpy(buf, leftRef.Data(), leftRef.Size());
  81. std::memcpy(buf + leftRef.Size(), rightRef.Data(), rightRef.Size());
  82. return result;
  83. } else {
  84. if (first.IsString()) {
  85. auto str = first.AsStringValue();
  86. const ui32 offset = leftRef.Data() - str.Data();
  87. if (str.Size() == leftRef.Size() + offset) {
  88. if (str.TryExpandOn(rightRef.Size())) {
  89. std::memcpy(str.Data() + offset + leftRef.Size(), rightRef.Data(), rightRef.Size());
  90. second.DeleteUnreferenced();
  91. return NUdf::TUnboxedValuePod(std::move(str), newSize, offset);
  92. }
  93. }
  94. }
  95. auto data = NUdf::TStringValue::AllocateData(newSize, newSize + newSize / 2);
  96. NUdf::TStringValue str(data);
  97. data->UnRef();
  98. std::memcpy(str.Data(), leftRef.Data(), leftRef.Size());
  99. std::memcpy(str.Data() + leftRef.Size(), rightRef.Data(), rightRef.Size());
  100. second.DeleteUnreferenced();
  101. return NUdf::TUnboxedValuePod(std::move(str));
  102. }
  103. }
  104. NUdf::TUnboxedValuePod SubString(const NUdf::TUnboxedValuePod value, ui32 offset, ui32 size)
  105. {
  106. const auto& ref = value.AsStringRef();
  107. if (size == 0U || ref.Size() <= offset) {
  108. value.DeleteUnreferenced();
  109. return NUdf::TUnboxedValuePod::Zero();
  110. }
  111. if (offset == 0U && ref.Size() <= size)
  112. return value;
  113. if (const auto newSize = std::min(ref.Size() - offset, size); newSize <= NUdf::TUnboxedValuePod::InternalBufferSize) {
  114. auto result = NUdf::TUnboxedValuePod::Embedded(newSize);
  115. std::memcpy(result.AsStringRef().Data(), ref.Data() + offset, newSize);
  116. value.DeleteUnreferenced();
  117. return result;
  118. } else {
  119. auto old = value.AsStringValue();
  120. if (const auto newOffset = ui32(ref.Data() - old.Data()) + offset; NUdf::TUnboxedValuePod::OffsetLimit > newOffset)
  121. return NUdf::TUnboxedValuePod(std::move(old), newSize, newOffset);
  122. auto data = NUdf::TStringValue::AllocateData(newSize, newSize + (newSize >> 1U));
  123. NUdf::TStringValue str(data);
  124. data->UnRef();
  125. std::memcpy(str.Data(), ref.Data() + offset, newSize);
  126. return NUdf::TUnboxedValuePod(std::move(str));
  127. }
  128. }
  129. NUdf::TUnboxedValuePod MakeString(const NUdf::TStringRef ref)
  130. {
  131. if (ref.Size() <= NUdf::TUnboxedValuePod::InternalBufferSize)
  132. return NUdf::TUnboxedValuePod::Embedded(ref);
  133. NUdf::TStringValue str(ref.Size());
  134. std::memcpy(str.Data(), ref.Data(), ref.Size());
  135. return NUdf::TUnboxedValuePod(std::move(str));
  136. }
  137. NUdf::TUnboxedValuePod MakeStringNotFilled(ui32 size, ui32 pad)
  138. {
  139. const auto fullSize = size + pad;
  140. if (fullSize <= NUdf::TUnboxedValuePod::InternalBufferSize)
  141. return NUdf::TUnboxedValuePod::Embedded(size);
  142. return NUdf::TUnboxedValuePod(NUdf::TStringValue(fullSize), size);
  143. }
  144. }
  145. }