diff.h 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. #pragma once
  2. #include <library/cpp/lcs/lcs_via_lis.h>
  3. #include <util/generic/algorithm.h>
  4. #include <util/generic/array_ref.h>
  5. #include <util/generic/strbuf.h>
  6. #include <util/generic/vector.h>
  7. #include <util/stream/output.h>
  8. #include <util/string/split.h>
  9. namespace NDiff {
  10. template <typename T>
  11. struct TChunk {
  12. TConstArrayRef<T> Left;
  13. TConstArrayRef<T> Right;
  14. TConstArrayRef<T> Common;
  15. TChunk() = default;
  16. TChunk(const TConstArrayRef<T>& left, const TConstArrayRef<T>& right, const TConstArrayRef<T>& common)
  17. : Left(left)
  18. , Right(right)
  19. , Common(common)
  20. {
  21. }
  22. };
  23. template <typename T>
  24. size_t InlineDiff(TVector<TChunk<T>>& chunks, const TConstArrayRef<T>& left, const TConstArrayRef<T>& right) {
  25. TConstArrayRef<T> s1(left);
  26. TConstArrayRef<T> s2(right);
  27. bool swapped = false;
  28. if (s1.size() < s2.size()) {
  29. // NLCS will silently swap strings if second string is longer
  30. // So we swap strings here and remember the fact since it is crucial to diff
  31. DoSwap(s1, s2);
  32. swapped = true;
  33. }
  34. TVector<T> lcs;
  35. NLCS::TLCSCtx<T> ctx;
  36. NLCS::MakeLCS<T>(s1, s2, &lcs, &ctx);
  37. // Start points of current common and diff parts
  38. const T* c1 = nullptr;
  39. const T* c2 = nullptr;
  40. const T* d1 = s1.begin();
  41. const T* d2 = s2.begin();
  42. // End points of current common parts
  43. const T* e1 = s1.begin();
  44. const T* e2 = s2.begin();
  45. size_t dist = s1.size() - lcs.size();
  46. const size_t n = ctx.ResultBuffer.size();
  47. for (size_t i = 0; i <= n && (e1 != s1.end() || e2 != s2.end());) {
  48. if (i < n) {
  49. // Common character exists
  50. // LCS is marked against positions in s2
  51. // Save the beginning of common part in s2
  52. c2 = s2.begin() + ctx.ResultBuffer[i];
  53. // Find the beginning of common part in s1
  54. c1 = Find(e1, s1.end(), *c2);
  55. // Follow common substring
  56. for (e1 = c1, e2 = c2; i < n && *e1 == *e2; ++e1, ++e2) {
  57. ++i;
  58. }
  59. } else {
  60. // No common character, common part is empty
  61. c1 = s1.end();
  62. c2 = s2.end();
  63. e1 = s1.end();
  64. e2 = s2.end();
  65. }
  66. TChunk<T> chunk(TConstArrayRef<T>(d1, c1), TConstArrayRef<T>(d2, c2), TConstArrayRef<T>(c1, e1));
  67. if (swapped) {
  68. DoSwap(chunk.Left, chunk.Right);
  69. chunk.Common = TConstArrayRef<T>(c2, e2);
  70. }
  71. chunks.push_back(chunk);
  72. d1 = e1;
  73. d2 = e2;
  74. }
  75. return dist;
  76. }
  77. template <typename TFormatter, typename T>
  78. void PrintChunks(IOutputStream& out, const TFormatter& fmt, const TVector<TChunk<T>>& chunks) {
  79. for (typename TVector<TChunk<T>>::const_iterator chunk = chunks.begin(); chunk != chunks.end(); ++chunk) {
  80. if (!chunk->Left.empty() || !chunk->Right.empty()) {
  81. out << fmt.Special("(");
  82. out << fmt.Left(chunk->Left);
  83. out << fmt.Special("|");
  84. out << fmt.Right(chunk->Right);
  85. out << fmt.Special(")");
  86. }
  87. out << fmt.Common(chunk->Common);
  88. }
  89. }
  90. // Without delimiters calculates character-wise diff
  91. // With delimiters calculates token-wise diff
  92. size_t InlineDiff(TVector<TChunk<char>>& chunks, const TStringBuf& left, const TStringBuf& right, const TString& delims = TString());
  93. size_t InlineDiff(TVector<TChunk<wchar16>>& chunks, const TWtringBuf& left, const TWtringBuf& right, const TUtf16String& delims = TUtf16String());
  94. }