comptable_ut.cpp 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. #include <library/cpp/comptable/comptable.h>
  2. #include <library/cpp/testing/unittest/registar.h>
  3. #include <util/random/random.h>
  4. #include <util/random/fast.h>
  5. using namespace NCompTable;
  6. template <bool HQ>
  7. void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {
  8. TVector<char> compressed;
  9. TVector<char> decompressed;
  10. TChunkCompressor compressor(HQ, table);
  11. TStringStream tmp;
  12. Save(&tmp, table);
  13. TCompressorTable tableLoaded;
  14. Load(&tmp, tableLoaded);
  15. UNIT_ASSERT(memcmp(&table, &tableLoaded, sizeof(table)) == 0);
  16. TChunkDecompressor deCompressor(HQ, tableLoaded);
  17. size_t origSize = 0;
  18. size_t compSize = 0;
  19. for (size_t i = 0; i < lines.size(); ++i) {
  20. const TString& line = lines[i];
  21. compressor.Compress(line, &compressed);
  22. origSize += line.size();
  23. compSize += compressed.size();
  24. TStringBuf in(compressed.data(), compressed.size());
  25. deCompressor.Decompress(in, &decompressed);
  26. UNIT_ASSERT(decompressed.size() == line.size() && memcmp(decompressed.data(), line.data(), decompressed.size()) == 0);
  27. }
  28. UNIT_ASSERT_EQUAL(origSize, 45491584);
  29. if (HQ) {
  30. UNIT_ASSERT_EQUAL(compSize, 11074583);
  31. } else {
  32. UNIT_ASSERT_EQUAL(compSize, 17459336);
  33. }
  34. UNIT_ASSERT(compSize < origSize);
  35. }
  36. Y_UNIT_TEST_SUITE(TestComptable) {
  37. Y_UNIT_TEST(TestComptableCompressDecompress) {
  38. TReallyFastRng32 rr(17);
  39. TVector<TString> lines;
  40. for (size_t i = 0; i < 1000000; ++i) {
  41. size_t size = rr.Uniform(32);
  42. TString res = "www.yandex.ru/yandsearch?text=";
  43. for (size_t j = 0; j < size; ++j) {
  44. res += "qwer"[rr.Uniform(4)];
  45. }
  46. lines.push_back(res);
  47. }
  48. THolder<TDataSampler> sampler(new TDataSampler);
  49. for (size_t i = 0; i < lines.size(); ++i) {
  50. sampler->AddStat(lines[i]);
  51. }
  52. TCompressorTable table;
  53. sampler->BuildTable(table);
  54. DoTest<true>(table, lines);
  55. DoTest<false>(table, lines);
  56. }
  57. }