123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543 |
- #include "huff.h"
- #include "metainfo.h"
- #include "bit.h"
- #include <util/generic/vector.h>
- #include <util/generic/map.h>
- #include <util/system/protect.h>
- #include <library/cpp/testing/unittest/registar.h>
- static ui64 gSeed = 42;
- static void FlushPseudoRandom() {
- gSeed = 42;
- }
- static ui32 PseudoRandom(ui32 max) {
- // stupid and non-threadsafe, but very predictable chaos generator
- gSeed += 1;
- gSeed *= 419;
- gSeed = gSeed ^ (ui64(max) << 17);
- return gSeed % max;
- }
- enum ECompMode {
- CM_SINGLEPASS,
- CM_TWOPASS
- };
- struct TTestParams {
- size_t DataSize;
- ui32 ValueArraySize;
- };
- template <typename X>
- void TestSaveLoadMeta(NCompProto::TMetaInfo<X>& src) {
- TStringStream ss;
- src.Save(ss);
- TString data = ss.Str();
- NCompProto::TMetaInfo<X> loadedMeta(data);
- ss = TStringStream();
- loadedMeta.Save(ss);
- UNIT_ASSERT_EQUAL(ss.Str(), data);
- }
- template <typename TDecompressor, template <typename, typename> class TSerialize>
- void TestWithParams(const TString& metainfo, const ECompMode mode, const TTestParams& params) {
- using namespace NCompProto;
- FlushPseudoRandom();
- TStringInput stream(metainfo);
- THolder<TMetaInfo<THuff>> meta;
- if (mode == CM_TWOPASS) {
- TMetaInfo<THist> hist(stream);
- TEmpty empty;
- TSerialize<THist, TEmpty>::Serialize(hist, empty, params);
- meta.Reset(new TMetaInfo<THuff>(hist, THistToHuff::Instance()));
- } else {
- meta.Reset(new TMetaInfo<THuff>(stream));
- }
- TestSaveLoadMeta(*meta.Get());
- TBitBuffer buffer;
- TSerialize<THuff, TBitBuffer>::Serialize(*meta, buffer, params);
- ui64 codedSize = buffer.Position;
- TMetaInfo<TTable> decompressor(*meta, THuffToTable::Instance());
- // verify that no memory read beyond buffer occurs
- const size_t byteSize = buffer.ByteLength();
- const size_t PAGESIZEX = 4096;
- const size_t busyPages = (byteSize + (PAGESIZEX - 1)) / PAGESIZEX;
- const size_t allPages = busyPages + 1;
- const size_t allocSize = (allPages + 1) * PAGESIZEX;
- TVector<ui8> readBuffer(allocSize);
- ui8* start = &readBuffer[0];
- ui8* pageStart = reinterpret_cast<ui8*>((size_t(start) + PAGESIZEX) & ~(PAGESIZEX - 1));
- // XX DATA DATA DATA DATA PROT
- // | | | | | pages
- // calculate dataStart so that data ends exactly at the page end
- ui8* dataStart = pageStart + busyPages * PAGESIZEX - byteSize;
- ui8* dataEnd = pageStart + busyPages * PAGESIZEX;
- ProtectMemory(dataEnd, PAGESIZEX, PM_NONE);
- // memory copying should be performed without any problems
- memcpy(dataStart, buffer.Out.data(), byteSize);
- ui64 position = 0;
- TMetaIterator<TDecompressor> instance;
- // we should not read beyond dataEnd here
- instance.Decompress(&decompressor, dataStart, position);
- const ui64 decodedSize = position;
- UNIT_ASSERT_EQUAL(codedSize, decodedSize);
- // unprotect memory
- ProtectMemory(dataEnd, PAGESIZEX, PM_READ | PM_WRITE | PM_EXEC);
- }
- template <typename TDecompressor, template <typename, typename> class TSerialize>
- void Test(const TString& metainfo, const ECompMode mode) {
- for (size_t ds = 3; ds < 42; ds += (3 + PseudoRandom(5))) {
- for (size_t vas = 5; vas < 42; vas += (4 + PseudoRandom(10))) {
- TTestParams params;
- params.DataSize = ds;
- params.ValueArraySize = vas;
- TestWithParams<TDecompressor, TSerialize>(metainfo, mode, params);
- }
- }
- }
- Y_UNIT_TEST_SUITE(CompProtoTestBasic) {
- using namespace NCompProto;
- const TString metainfo =
- "\n\
- repeated data id 0\n\
- scalar clicks id 0 default const 0\n\
- scalar shows id 1 default const 0\n\
- repeated regClicks id 2\n\
- scalar clicks id 0 default const 0\n\
- scalar shows id 1 default const 0\n\
- end\n\
- scalar extra id 31 default const 0\n\
- end\n";
- struct TRegInfo {
- ui32 Clicks;
- ui32 Shows;
- };
- struct TData {
- ui32 Clicks;
- ui32 Shows;
- ui32 Extra;
- TMap<ui32, TRegInfo> RegClicks;
- };
- TVector<TData> data;
- template <class TMeta, class TFunctor>
- struct TSerialize {
- static void Serialize(TMetaInfo<TMeta>& meta, TFunctor& functor, const TTestParams& params) {
- FlushPseudoRandom();
- meta.BeginSelf(functor);
- data.clear();
- data.resize(params.DataSize);
- for (ui32 i = 0; i < params.DataSize; ++i) {
- meta.BeginElement(i, functor);
- data[i].Clicks = PseudoRandom(16) + 100;
- data[i].Shows = PseudoRandom(500) * PseudoRandom(16);
- data[i].Extra = PseudoRandom(500) + (1UL << 31); // test also saving of big values
- meta.SetScalar(0, data[i].Clicks, functor);
- meta.SetScalar(1, data[i].Shows, functor);
- TMetaInfo<TMeta>& regClicks = meta.BeginRepeated(2, functor);
- for (ui32 j = 0; j < PseudoRandom(200); j += 1 + PseudoRandom(10)) {
- regClicks.BeginElement(j, functor);
- TRegInfo& r = data[i].RegClicks[j];
- r.Clicks = PseudoRandom(2);
- r.Shows = PseudoRandom(800) * PseudoRandom(8) + 56;
- regClicks.SetScalar(0, r.Clicks, functor);
- regClicks.SetScalar(1, r.Shows, functor);
- regClicks.EndElement(functor);
- }
- regClicks.EndRepeated(functor);
- meta.SetScalar(31, data[i].Extra, functor);
- meta.EndElement(functor);
- }
- meta.EndRepeated(functor);
- }
- };
- struct TMultiDecompressor: public TParentHold<TMultiDecompressor> {
- struct TRegClicks: public TParentHold<TRegClicks> {
- const TData* Data;
- const TRegInfo* Elem;
- TRegClicks()
- : Data(nullptr)
- , Elem(nullptr)
- {
- }
- void BeginSelf(ui32 /*count*/, ui32 /*id*/) {
- }
- void EndSelf() {
- }
- void BeginElement(ui32 element) {
- TMap<ui32, TRegInfo>::const_iterator it = Data->RegClicks.find(element);
- if (it == Data->RegClicks.end()) {
- UNIT_ASSERT(0);
- }
- Elem = &it->second;
- }
- void EndElement() {
- }
- void SetScalar(size_t index, ui32 val) {
- if (index == 0)
- UNIT_ASSERT_EQUAL(val, Elem->Clicks);
- if (index == 1)
- UNIT_ASSERT_EQUAL(val, Elem->Shows);
- }
- IDecompressor& GetDecompressor(size_t) {
- UNIT_ASSERT(0);
- return GetEmptyDecompressor();
- }
- };
- const TData* Elem;
- TMetaIterator<TRegClicks> RegClicks;
- void BeginSelf(ui32 /*count*/, ui32 /*id*/) {
- }
- void EndSelf() {
- }
- void BeginElement(ui32 element) {
- UNIT_ASSERT(element < data.size());
- Elem = &data[element];
- }
- void EndElement() {
- }
- void SetScalar(size_t index, ui32 val) {
- if (index == 0)
- UNIT_ASSERT_EQUAL(val, Elem->Clicks);
- if (index == 1)
- UNIT_ASSERT_EQUAL(val, Elem->Shows);
- if (index == 31)
- UNIT_ASSERT_EQUAL(val, Elem->Extra);
- }
- IDecompressor& GetDecompressor(size_t index) {
- if (index == 2) {
- RegClicks.Self.Data = Elem;
- return RegClicks;
- }
- UNIT_ASSERT(0);
- return GetEmptyDecompressor();
- }
- TMultiDecompressor()
- : Elem(nullptr)
- {
- }
- };
- struct TVerifyingDecompressor: public TParentHold<TVerifyingDecompressor> {
- enum EState {
- Startstop,
- OutDataElem,
- InDataElem,
- InRegClicks,
- };
- EState State;
- ui32 DataInd;
- TMap<ui32, TRegInfo>::iterator RegIter;
- TMetaIterator<TVerifyingDecompressor>& GetDecompressor(size_t index) {
- Y_UNUSED(index);
- return *Parent;
- }
- TVerifyingDecompressor()
- : State(Startstop)
- , DataInd(0)
- {
- }
- void BeginSelf(ui32 /*count*/, ui32 id) {
- switch (State) {
- case Startstop:
- UNIT_ASSERT_EQUAL(id, 0);
- State = OutDataElem;
- break;
- case OutDataElem:
- UNIT_ASSERT(0);
- case InDataElem:
- UNIT_ASSERT_EQUAL(id, 2);
- State = InRegClicks;
- RegIter = data[DataInd].RegClicks.begin();
- break;
- case InRegClicks:
- UNIT_ASSERT(0);
- default:
- UNIT_ASSERT(0);
- }
- }
- void EndSelf() {
- switch (State) {
- case Startstop:
- UNIT_ASSERT(0);
- case OutDataElem:
- State = Startstop;
- break;
- case InDataElem:
- UNIT_ASSERT(0);
- case InRegClicks:
- UNIT_ASSERT_EQUAL(RegIter, data[DataInd].RegClicks.end());
- State = InDataElem;
- break;
- default:
- UNIT_ASSERT(0);
- }
- }
- void BeginElement(ui32 element) {
- switch (State) {
- case Startstop:
- UNIT_ASSERT(0);
- case OutDataElem:
- UNIT_ASSERT(element < data.size());
- State = InDataElem;
- break;
- case InDataElem:
- UNIT_ASSERT(0);
- case InRegClicks:
- UNIT_ASSERT_EQUAL(element, RegIter->first);
- break;
- }
- }
- void EndElement() {
- switch (State) {
- case Startstop:
- UNIT_ASSERT(0);
- case OutDataElem:
- UNIT_ASSERT(0);
- case InDataElem:
- State = OutDataElem;
- ++DataInd;
- break;
- case InRegClicks:
- ++RegIter;
- break;
- }
- }
- void SetScalar(size_t index, ui32 val) {
- switch (State) {
- case OutDataElem:
- UNIT_ASSERT(0);
- case InDataElem:
- if (index == 0)
- UNIT_ASSERT_EQUAL(val, data[DataInd].Clicks);
- if (index == 1)
- UNIT_ASSERT_EQUAL(val, data[DataInd].Shows);
- if (index == 31)
- UNIT_ASSERT_EQUAL(val, data[DataInd].Extra);
- break;
- case InRegClicks:
- if (index == 0)
- UNIT_ASSERT_EQUAL(val, RegIter->second.Clicks);
- if (index == 1)
- UNIT_ASSERT_EQUAL(val, RegIter->second.Shows);
- break;
- default:
- UNIT_ASSERT(0);
- }
- }
- };
- Y_UNIT_TEST(VerifyDecompression) {
- Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_SINGLEPASS);
- }
- Y_UNIT_TEST(VerifyHistDecompression) {
- Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_TWOPASS);
- }
- Y_UNIT_TEST(VerifyDecompressionMulti) {
- Test<TMultiDecompressor, TSerialize>(metainfo, CM_SINGLEPASS);
- }
- Y_UNIT_TEST(VerifyHistDecompressionMulti) {
- Test<TMultiDecompressor, TSerialize>(metainfo, CM_TWOPASS);
- }
- }
- Y_UNIT_TEST_SUITE(CompProtoTestExtended) {
- using namespace NCompProto;
- const TString metainfo =
- "\n\
- repeated data id 0\n\
- repeated second id 3\n\
- scalar inner2 id 0 default const 0\n\
- end\n\
- repeated first id 2\n\
- scalar inner id 0 default const 0\n\
- end\n\
- end\n";
- TVector<std::pair<TVector<ui32>, TVector<ui32>>> data;
- template <class TMeta, class TFunctor>
- struct TSerialize {
- static void Serialize(TMetaInfo<TMeta>& meta, TFunctor& functor, const TTestParams& params) {
- FlushPseudoRandom();
- meta.BeginSelf(functor);
- data.clear();
- data.resize(params.DataSize);
- for (size_t i = 0; i < params.DataSize; ++i) {
- meta.BeginElement(i, functor);
- TMetaInfo<TMeta>& first = meta.BeginRepeated(2, functor);
- data[i].first.resize(params.ValueArraySize);
- for (ui32 j = 0; j < params.ValueArraySize; j++) {
- first.BeginElement(j, functor);
- ui32 val = PseudoRandom(42 * 42 * 42);
- first.SetScalar(0, val, functor);
- data[i].first[j] = val;
- first.EndElement(functor);
- }
- first.EndRepeated(functor);
- TMetaInfo<TMeta>& second = meta.BeginRepeated(3, functor);
- data[i].second.resize(params.ValueArraySize);
- for (ui32 j = 0; j < params.ValueArraySize; j++) {
- second.BeginElement(j, functor);
- ui32 val = PseudoRandom(42 * 42 * 42);
- second.SetScalar(0, val, functor);
- data[i].second[j] = val;
- second.EndElement(functor);
- }
- second.EndRepeated(functor);
- meta.EndElement(functor);
- }
- meta.EndRepeated(functor);
- }
- };
- struct TVerifyingDecompressor: public TParentHold<TVerifyingDecompressor> {
- enum EState {
- Startstop,
- OutDataElem,
- InDataElemBeforeSecond,
- InDataElemSecond,
- InFirst,
- InSecond,
- };
- EState State;
- ui32 DataInd;
- ui32 ArrayInd;
- TVerifyingDecompressor()
- : State(Startstop)
- , DataInd(0)
- , ArrayInd(0)
- {
- }
- TMetaIterator<TVerifyingDecompressor>& GetDecompressor(size_t index) {
- Y_UNUSED(index);
- return *Parent;
- }
- void BeginSelf(ui32 /*count*/, ui32 id) {
- switch (State) {
- case Startstop:
- UNIT_ASSERT_EQUAL(id, 0);
- State = OutDataElem;
- break;
- case InDataElemBeforeSecond:
- UNIT_ASSERT_EQUAL(id, 2);
- State = InFirst;
- ArrayInd = 0;
- break;
- case InDataElemSecond:
- UNIT_ASSERT_EQUAL(id, 3);
- State = InSecond;
- ArrayInd = 0;
- break;
- default:
- UNIT_ASSERT(0);
- }
- }
- void EndSelf() {
- switch (State) {
- case OutDataElem:
- State = Startstop;
- break;
- case InFirst:
- State = InDataElemSecond;
- break;
- case InSecond:
- State = InDataElemSecond;
- break;
- default:
- UNIT_ASSERT(0);
- }
- }
- void BeginElement(ui32 element) {
- switch (State) {
- case OutDataElem:
- UNIT_ASSERT(element < data.size());
- State = InDataElemBeforeSecond;
- break;
- case InFirst:
- UNIT_ASSERT(element < data[DataInd].first.size());
- break;
- case InSecond:
- UNIT_ASSERT(element < data[DataInd].second.size());
- break;
- default:
- Cerr << (ui32)State << Endl;
- UNIT_ASSERT(0);
- }
- }
- void EndElement() {
- switch (State) {
- case InFirst:
- case InSecond:
- ++ArrayInd;
- break;
- case InDataElemSecond:
- ++DataInd;
- State = OutDataElem;
- break;
- default:
- Cerr << (ui32)State << Endl;
- UNIT_ASSERT(0);
- }
- }
- void SetScalar(size_t index, ui32 val) {
- UNIT_ASSERT_EQUAL(index, 0);
- switch (State) {
- case InFirst:
- UNIT_ASSERT_EQUAL(val, data[DataInd].first[ArrayInd]);
- break;
- case InSecond:
- UNIT_ASSERT_EQUAL(val, data[DataInd].second[ArrayInd]);
- break;
- default:
- UNIT_ASSERT(0);
- }
- }
- };
- Y_UNIT_TEST(VerifyDecompression) {
- Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_SINGLEPASS);
- }
- Y_UNIT_TEST(VerifyHistDecompression) {
- Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_TWOPASS);
- }
- }
|