#include "levenshtein_diff.h" #include #include namespace { float unaryZeroWeigher(const char&) { return 0.0f; } float unaryMaxWeigher(const char&) { return 1.0f; } float binaryZeroWeigher(const char&, const char&) { return 0.0f; } float binaryMaxWeigher(const char&, const char&) { return 1.0f; } } Y_UNIT_TEST_SUITE(Levenstein) { Y_UNIT_TEST(Distance) { UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("knight"), TStringBuf("king")), 4); UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("life flies"), TStringBuf("fly lives")), 6); UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("spot trail"), TStringBuf("stop trial")), 4); UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("hello"), TStringBuf("hulloah")), 3); UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("yeoman"), TStringBuf("yo man")), 2); } Y_UNIT_TEST(DamerauDistance) { UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::DamerauDistance(TStringBuf("knight"), TStringBuf("king")), 3); UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::DamerauDistance(TStringBuf("life flies"), TStringBuf("fly lives")), 6); UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::DamerauDistance(TStringBuf("spot trail"), TStringBuf("stop trial")), 3); UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::DamerauDistance(TStringBuf("status"), TStringBuf("tsatus")), 1); UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::DamerauDistance(TStringBuf("hello"), TStringBuf("heh lol")), 3); } } Y_UNIT_TEST_SUITE(WeightedLevenstein) { Y_UNIT_TEST(EqualStrings) { NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("12345"), TString("12345"), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher); UNIT_ASSERT_VALUES_EQUAL(distance, 0.0f); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); } Y_UNIT_TEST(EmptyStrings) { NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString(""), TString(""), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher); UNIT_ASSERT_VALUES_EQUAL(distance, 0.0f); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 0); } Y_UNIT_TEST(InsertsOnly) { auto unaryWeigher = [](const char&) { return 2.0f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString(""), TString("12345"), chain, &distance, binaryZeroWeigher, unaryZeroWeigher, unaryWeigher); UNIT_ASSERT_VALUES_EQUAL(distance, 10.0f); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); } Y_UNIT_TEST(DeletionsOnly) { auto unaryWeigher = [](const char&) { return 3.0f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("54321"), TString(""), chain, &distance, binaryZeroWeigher, unaryWeigher, unaryZeroWeigher); UNIT_ASSERT_VALUES_EQUAL(distance, 15.0f); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); } Y_UNIT_TEST(SymmetryCheck) { const TString str1 = "123x5"; const TString str2 = "x2345"; const float trgDistance = 2.0f; const size_t trgChainLen = 5; NLevenshtein::TEditChain chainLeftRight; float distanceLeftRight = 0.0f; NLevenshtein::GetEditChain(str1, str2, chainLeftRight, &distanceLeftRight, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher); UNIT_ASSERT_VALUES_EQUAL(distanceLeftRight, trgDistance); UNIT_ASSERT_VALUES_EQUAL(chainLeftRight.size(), trgChainLen); UNIT_ASSERT_VALUES_EQUAL(static_cast(chainLeftRight[0]), static_cast(NLevenshtein::EMT_REPLACE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chainLeftRight[1]), static_cast(NLevenshtein::EMT_PRESERVE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chainLeftRight[2]), static_cast(NLevenshtein::EMT_PRESERVE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chainLeftRight[3]), static_cast(NLevenshtein::EMT_REPLACE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chainLeftRight[4]), static_cast(NLevenshtein::EMT_PRESERVE)); NLevenshtein::TEditChain chainRightLeft; float distanceRightLeft = 0.0f; NLevenshtein::GetEditChain(str2, str1, chainRightLeft, &distanceRightLeft, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher); UNIT_ASSERT_VALUES_EQUAL(distanceRightLeft, trgDistance); UNIT_ASSERT_VALUES_EQUAL(chainRightLeft.size(), trgChainLen); UNIT_ASSERT(chainRightLeft == chainLeftRight); } Y_UNIT_TEST(PreferReplacements) { auto binaryWeigher = [](const char&, const char&) { return 0.0625f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("54321"), TString("43210"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher); UNIT_ASSERT_VALUES_EQUAL(distance, 0.3125f); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); } Y_UNIT_TEST(PreferInsertDeletions) { auto unaryWeigher = [](const char&) { return 0.0625f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("54321"), TString("98765"), chain, &distance, binaryMaxWeigher, unaryWeigher, unaryWeigher); UNIT_ASSERT_VALUES_EQUAL(distance, 0.5f); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 9); } Y_UNIT_TEST(NoXDeletions) { auto unaryWeigher = [](const char& c) { return c == 'x' ? 100.0f : 1.0f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("543x1"), TString("5431"), chain, &distance, binaryMaxWeigher, unaryWeigher, unaryMaxWeigher); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[3]), static_cast(NLevenshtein::EMT_REPLACE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[4]), static_cast(NLevenshtein::EMT_DELETE)); UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f); } Y_UNIT_TEST(NoXInsertions) { auto unaryWeigher = [](const char& c) { return c == 'x' ? 100.0f : 1.0f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("5431"), TString("543x1"), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryWeigher); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[3]), static_cast(NLevenshtein::EMT_REPLACE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[4]), static_cast(NLevenshtein::EMT_INSERT)); UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f); } Y_UNIT_TEST(NoReplacementsOfX) { auto binaryWeigher = [](const char& l, const char&) { return l == 'x' ? 100.0f : 1.0f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("5432x"), TString("5432y"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 6); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[4]), static_cast(NLevenshtein::EMT_DELETE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[5]), static_cast(NLevenshtein::EMT_INSERT)); UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f); } Y_UNIT_TEST(NoReplacementsForX) { auto binaryWeigher = [](const char&, const char& r) { return r == 'x' ? 100.0f : 1.0f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("y4321"), TString("x4321"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 6); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[0]), static_cast(NLevenshtein::EMT_DELETE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[1]), static_cast(NLevenshtein::EMT_INSERT)); UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f); } Y_UNIT_TEST(SimilarOperationPriorities) { auto replaceWeigher = [](const char&, const char&) { return 0.5f; }; auto deleteWeigher = [](const char&) { return 0.2f; }; auto insertWeigher = [](const char&) { return 0.9f; }; NLevenshtein::TEditChain chain; float distance = 0.0f; NLevenshtein::GetEditChain(TString("y0"), TString("0x"), chain, &distance, replaceWeigher, deleteWeigher, insertWeigher); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 2); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[0]), static_cast(NLevenshtein::EMT_REPLACE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[1]), static_cast(NLevenshtein::EMT_REPLACE)); UNIT_ASSERT_VALUES_EQUAL(distance, 1.0f); } Y_UNIT_TEST(DamerauLevenshtein) { int distance; NLevenshtein::TEditChain chain; NLevenshtein::GetEditChainGeneric(TString("status"), TString("tsatus"), chain, &distance); UNIT_ASSERT_VALUES_EQUAL(distance, 1); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[0]), static_cast(NLevenshtein::EMT_TRANSPOSE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[1]), static_cast(NLevenshtein::EMT_PRESERVE)); NLevenshtein::GetEditChainGeneric(TString("hello"), TString("heh lol"), chain, &distance); UNIT_ASSERT_VALUES_EQUAL(distance, 3); UNIT_ASSERT_VALUES_EQUAL(chain.size(), 6); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[0]), static_cast(NLevenshtein::EMT_PRESERVE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[1]), static_cast(NLevenshtein::EMT_PRESERVE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[2]), static_cast(NLevenshtein::EMT_INSERT)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[3]), static_cast(NLevenshtein::EMT_INSERT)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[4]), static_cast(NLevenshtein::EMT_PRESERVE)); UNIT_ASSERT_VALUES_EQUAL(static_cast(chain[5]), static_cast(NLevenshtein::EMT_TRANSPOSE)); } }