kmp.h 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #pragma once
  2. #include <util/generic/ptr.h>
  3. #include <util/generic/string.h>
  4. #include <util/generic/vector.h>
  5. #include <util/generic/yexception.h>
  6. template <typename T>
  7. void ComputePrefixFunction(const T* begin, const T* end, ssize_t** result) {
  8. Y_ENSURE(begin != end, TStringBuf("empty pattern"));
  9. ssize_t len = end - begin;
  10. TArrayHolder<ssize_t> resultHolder(new ssize_t[len + 1]);
  11. ssize_t i = 0;
  12. ssize_t j = -1;
  13. resultHolder[0] = -1;
  14. while (i < len) {
  15. while ((j >= 0) && (begin[j] != begin[i]))
  16. j = resultHolder[j];
  17. ++i;
  18. ++j;
  19. Y_ASSERT(i >= 0);
  20. Y_ASSERT(j >= 0);
  21. Y_ASSERT(j < len);
  22. if ((i < len) && (begin[i] == begin[j]))
  23. resultHolder[i] = resultHolder[j];
  24. else
  25. resultHolder[i] = j;
  26. }
  27. *result = resultHolder.Release();
  28. }
  29. class TKMPMatcher {
  30. private:
  31. TArrayHolder<ssize_t> PrefixFunction;
  32. TString Pattern;
  33. void ComputePrefixFunction();
  34. public:
  35. TKMPMatcher(const char* patternBegin, const char* patternEnd);
  36. TKMPMatcher(const TString& pattern);
  37. bool SubStr(const char* begin, const char* end, const char*& result) const {
  38. Y_ASSERT(begin <= end);
  39. ssize_t m = Pattern.size();
  40. ssize_t n = end - begin;
  41. ssize_t i, j;
  42. for (i = 0, j = 0; (i < n) && (j < m); ++i, ++j) {
  43. while ((j >= 0) && (Pattern[j] != begin[i]))
  44. j = PrefixFunction[j];
  45. }
  46. if (j == m) {
  47. result = begin + i - m;
  48. return true;
  49. } else {
  50. return false;
  51. }
  52. }
  53. };
  54. template <typename T>
  55. class TKMPStreamMatcher {
  56. public:
  57. class ICallback {
  58. public:
  59. virtual void OnMatch(const T* begin, const T* end) = 0;
  60. virtual ~ICallback() = default;
  61. };
  62. private:
  63. ICallback* Callback;
  64. TArrayHolder<ssize_t> PrefixFunction;
  65. using TTVector = TVector<T>;
  66. TTVector Pattern;
  67. ssize_t State;
  68. TTVector Candidate;
  69. public:
  70. TKMPStreamMatcher(const T* patternBegin, const T* patternEnd, ICallback* callback)
  71. : Callback(callback)
  72. , Pattern(patternBegin, patternEnd)
  73. , State(0)
  74. , Candidate(Pattern.size())
  75. {
  76. ssize_t* pf;
  77. ComputePrefixFunction(patternBegin, patternEnd, &pf);
  78. PrefixFunction.Reset(pf);
  79. }
  80. void Push(const T& symbol) {
  81. while ((State >= 0) && (Pattern[State] != symbol)) {
  82. Y_ASSERT(State <= (ssize_t) Pattern.size());
  83. State = PrefixFunction[State];
  84. Y_ASSERT(State <= (ssize_t) Pattern.size());
  85. }
  86. if (State >= 0)
  87. Candidate[State] = symbol;
  88. ++State;
  89. if (State == (ssize_t) Pattern.size()) {
  90. Callback->OnMatch(Candidate.begin(), Candidate.end());
  91. State = 0;
  92. }
  93. }
  94. void Clear() {
  95. State = 0;
  96. }
  97. };