12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- #pragma once
- #include "gd_entry.h"
- #include <util/generic/hash.h>
- #include <util/random/fast.h>
- namespace NGreedyDict {
- struct TBuildSettings {
- EEntryStatTest StatTest = EST_SIMPLE_NORM;
- EEntryScore Score = ES_LEN_SIMPLE;
- float MinPValue = 0.75;
- ui32 MinAbsCount = 10;
- ui32 GrowLimit = 10; // times of maxentries
- bool Verbose = false;
- };
- class TDictBuilder {
- using TCompoundCounts = THashMap<ui64, ui32, THash<ui64>, TEqualTo<ui64>, TPoolAllocator>;
- using TCandidate = std::pair<float, ui64>;
- using TCandidates = TVector<TCandidate>;
- private:
- TFastRng64 Rng{0x1a5d0ac170565c1c, 0x0be7bc27, 0x6235f6f57820aa0d, 0xafdc7fb};
- TStringBufs Input;
- THolder<TEntrySet> Current;
- TMemoryPool CompoundCountsPool;
- THolder<TCompoundCounts> CompoundCounts;
- TCandidates Candidates;
- TBuildSettings Settings;
- public:
- TDictBuilder(const TBuildSettings& s = TBuildSettings())
- : CompoundCountsPool(8112, TMemoryPool::TLinearGrow::Instance())
- , Settings(s)
- {
- }
- void SetInput(const TStringBufs& in) {
- Input = in;
- }
- const TBuildSettings& GetSettings() const {
- return Settings;
- }
- TBuildSettings& GetSettings() {
- return Settings;
- }
- void SetSettings(const TBuildSettings& s) {
- Settings = s;
- }
- TEntrySet& EntrySet() {
- return *Current;
- }
- const TEntrySet& EntrySet() const {
- return *Current;
- }
- THolder<TEntrySet> ReleaseEntrySet() {
- return std::move(Current);
- }
- ui32 /*iters*/ Build(ui32 maxentries, ui32 maxiters = 16, ui32 maxlen = -1, ui32 mindiff = 10);
- public:
- void RebuildCounts(ui32 maxcand, bool final);
- ui32 /*diff size*/ BuildNextGeneration(ui32 maxent, ui32 maxlen);
- static bool IsCompound(ui64 ent) {
- return ent & 0xFFFFFFFF00000000ULL;
- }
- static ui32 Next(ui64 ent) {
- return ent;
- }
- static ui32 Prev(ui64 ent) {
- return (ent >> 32) - 1;
- }
- static ui64 Compose(ui32 prev, ui32 next) {
- return ((prev + 1ULL) << 32) | next;
- }
- };
- }
|