diff.cpp 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #include "diff.h"
  2. #include <util/generic/hash.h>
  3. #include <util/digest/fnv.h>
  4. #include <iterator>
  5. template <typename T>
  6. struct TCollectionImpl {
  7. TVector<TConstArrayRef<T>> Words;
  8. TVector<ui64> Keys;
  9. inline bool Consume(const T* b, const T* e, const T*) {
  10. if (b < e) {
  11. Words.push_back(TConstArrayRef<T>(b, e));
  12. Keys.push_back(FnvHash<ui64>((const char*)b, (e - b) * sizeof(T)));
  13. }
  14. return true;
  15. }
  16. TConstArrayRef<T> Remap(const TConstArrayRef<ui64>& keys) const {
  17. if (keys.empty()) {
  18. return TConstArrayRef<T>();
  19. }
  20. auto firstWordPos = std::distance(Keys.data(), keys.begin());
  21. auto lastWordPos = std::distance(Keys.data(), keys.end()) - 1;
  22. Y_ASSERT(firstWordPos >= 0);
  23. Y_ASSERT(lastWordPos >= firstWordPos);
  24. Y_ASSERT(static_cast<size_t>(lastWordPos) < Words.size());
  25. return TConstArrayRef<T>(Words[firstWordPos].begin(), Words[lastWordPos].end());
  26. }
  27. TConstArrayRef<ui64> GetKeys() const {
  28. return TConstArrayRef<ui64>(Keys);
  29. }
  30. };
  31. template <typename T>
  32. struct TCollection {
  33. };
  34. template <>
  35. struct TCollection<char>: public TCollectionImpl<char> {
  36. TCollection(const TStringBuf& str, const TString& delims) {
  37. TSetDelimiter<const char> set(delims.data());
  38. TKeepDelimiters<TCollection<char>> c(this);
  39. SplitString(str.begin(), str.end(), set, c);
  40. }
  41. };
  42. template <>
  43. struct TCollection<wchar16>: public TCollectionImpl<wchar16> {
  44. TCollection(const TWtringBuf& str, const TUtf16String& delims) {
  45. TSetDelimiter<const wchar16> set(delims.data());
  46. TKeepDelimiters<TCollection<wchar16>> c(this);
  47. SplitString(str.begin(), str.end(), set, c);
  48. }
  49. };
  50. size_t NDiff::InlineDiff(TVector<TChunk<char>>& chunks, const TStringBuf& left, const TStringBuf& right, const TString& delims) {
  51. if (delims.empty()) {
  52. return InlineDiff<char>(chunks, TConstArrayRef<char>(left.data(), left.size()), TConstArrayRef<char>(right.data(), right.size()));
  53. }
  54. TCollection<char> c1(left, delims);
  55. TCollection<char> c2(right, delims);
  56. TVector<TChunk<ui64>> diff;
  57. const size_t dist = InlineDiff<ui64>(diff, c1.GetKeys(), c2.GetKeys());
  58. for (const auto& it : diff) {
  59. chunks.push_back(TChunk<char>(c1.Remap(it.Left), c2.Remap(it.Right), c1.Remap(it.Common)));
  60. }
  61. return dist;
  62. }
  63. size_t NDiff::InlineDiff(TVector<TChunk<wchar16>>& chunks, const TWtringBuf& left, const TWtringBuf& right, const TUtf16String& delims) {
  64. if (delims.empty()) {
  65. return InlineDiff<wchar16>(chunks, TConstArrayRef<wchar16>(left.data(), left.size()), TConstArrayRef<wchar16>(right.data(), right.size()));
  66. }
  67. TCollection<wchar16> c1(left, delims);
  68. TCollection<wchar16> c2(right, delims);
  69. TVector<TChunk<ui64>> diff;
  70. const size_t dist = InlineDiff<ui64>(diff, c1.GetKeys(), c2.GetKeys());
  71. for (const auto& it : diff) {
  72. chunks.push_back(TChunk<wchar16>(c1.Remap(it.Left), c2.Remap(it.Right), c1.Remap(it.Common)));
  73. }
  74. return dist;
  75. }