123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- #include "absl/strings/internal/damerau_levenshtein_distance.h"
- #include <algorithm>
- #include <array>
- #include <numeric>
- #include "absl/strings/string_view.h"
- namespace absl {
- ABSL_NAMESPACE_BEGIN
- namespace strings_internal {
- uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1,
- absl::string_view s2, uint8_t cutoff) {
- const uint8_t MAX_SIZE = 100;
- const uint8_t _cutoff = std::min(MAX_SIZE, cutoff);
- const uint8_t cutoff_plus_1 = static_cast<uint8_t>(_cutoff + 1);
- if (s1.size() > s2.size()) std::swap(s1, s2);
- if (s1.size() + _cutoff < s2.size() || s2.size() > MAX_SIZE)
- return cutoff_plus_1;
- if (s1.empty())
- return static_cast<uint8_t>(s2.size());
-
- const uint8_t lower_diag =
- _cutoff - static_cast<uint8_t>(s2.size() - s1.size());
-
- const uint8_t upper_diag = _cutoff;
-
- std::array<std::array<uint8_t, MAX_SIZE + 2>, MAX_SIZE + 2> d;
- std::iota(d[0].begin(), d[0].begin() + upper_diag + 1, 0);
- d[0][cutoff_plus_1] = cutoff_plus_1;
- for (size_t i = 1; i <= s1.size(); ++i) {
-
- size_t j_begin = 1;
- if (i > lower_diag) {
- j_begin = i - lower_diag;
- d[i][j_begin - 1] = cutoff_plus_1;
- } else {
- d[i][0] = static_cast<uint8_t>(i);
- }
-
- size_t j_end = i + upper_diag;
- if (j_end > s2.size()) {
- j_end = s2.size();
- } else {
- d[i][j_end + 1] = cutoff_plus_1;
- }
- for (size_t j = j_begin; j <= j_end; ++j) {
- const uint8_t deletion_distance = d[i - 1][j] + 1;
- const uint8_t insertion_distance = d[i][j - 1] + 1;
- const uint8_t mismatched_tail_cost = s1[i - 1] == s2[j - 1] ? 0 : 1;
- const uint8_t mismatch_distance = d[i - 1][j - 1] + mismatched_tail_cost;
- uint8_t transposition_distance = _cutoff + 1;
- if (i > 1 && j > 1 && s1[i - 1] == s2[j - 2] && s1[i - 2] == s2[j - 1])
- transposition_distance = d[i - 2][j - 2] + 1;
- d[i][j] = std::min({cutoff_plus_1, deletion_distance, insertion_distance,
- mismatch_distance, transposition_distance});
- }
- }
- return d[s1.size()][s2.size()];
- }
- }
- ABSL_NAMESPACE_END
- }
|