123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553 |
- #pragma once
- #include <util/generic/bitops.h>
- #include <util/generic/maybe.h>
- #include <util/generic/strbuf.h>
- #include <util/generic/string.h>
- #include <util/stream/output.h>
- #include <cfloat>
- #include <cmath>
- // TODO: remove when switched to c++11 stl
- #if _LIBCPP_STD_VER >= 11
- #include <limits>
- #else
- #define PRESORT_FP_DISABLED
- #endif
- /*
- Serialization PREServing ORder of Tuples of numbers, strings and optional numbers or strings
- Lexicographic ordering of serialized tuples will be the same as of non-serialized
- Descending order is supported
- */
- namespace NPresort {
- namespace NImpl {
- enum ECode {
- StringEnd = 0x00,
- StringPart = 0x10,
- IntNeg = 0x20,
- IntNonNeg = 0x30,
- Unsigned = 0x40,
- Float = 0x50,
- Double = 0x60,
- Extension = 0x70,
- Descending = 0x80,
- };
- static const ui8 CodeMask = 0xf0;
- static const ui8 LengthMask = 0x0f;
- static const ui8 Optional = 0x01;
- static const ui8 OptionalFilled = 0x02;
- enum EFPCode {
- NegInf = 0x00,
- NegFar = 0x01,
- NegNear = 0x02,
- NegSub = 0x03,
- Zero = 0x04,
- PosSub = 0x05,
- PosNear = 0x06,
- PosFar = 0x07,
- PosInf = 0x08,
- Nan = 0x09,
- Disabled = 0x0f
- };
- static const ui8 FPCodeMask = 0x0f;
- static const size_t BlockLength = 15;
- static const size_t BufferLength = BlockLength + 1;
- static const float FloatSignificandBase = pow((float)FLT_RADIX, FLT_MANT_DIG);
- static const double DoubleSignificandBase = pow((double)FLT_RADIX, DBL_MANT_DIG);
- template <typename T>
- struct TSignificandBase {
- static double Value() {
- return DoubleSignificandBase;
- }
- };
- template <>
- struct TSignificandBase<float> {
- static float Value() {
- return FloatSignificandBase;
- }
- };
- struct TDecodeContext {
- ECode Code;
- TString Err;
- TString Str;
- ui32 StrBlocks = 0;
- i64 SignedVal = 0;
- ui64 UnsignedVal = 0;
- float FloatVal = 0;
- double DoubleVal = 0;
- bool Optional = false;
- bool Filled = false;
- };
- class TBlock {
- public:
- TBlock()
- : Len(0)
- {
- memset(Buf, 0, BufferLength);
- }
- void Put(IOutputStream& out) const {
- out.Write(Buf, Len);
- }
- ui8 GetLen() const {
- return Len;
- }
- void EncodeSignedInt(i64 val, bool desc) {
- const bool neg = val < 0;
- const ui8 bytes = val ? EncodeInt(neg ? -val : val) : 0;
- Set(neg ? ((~IntNeg) & CodeMask) : IntNonNeg, bytes, neg != desc);
- }
- void EncodeUnsignedInt(ui64 val, bool desc, bool end = false) {
- const ui8 bytes = val ? EncodeInt(val) : 0;
- Set(end ? StringEnd : Unsigned, bytes, desc);
- }
- bool EncodeFloating(float val, bool desc) {
- const EFPCode code = FPCode(val);
- Set(Float | code, 0, desc);
- return FPNeedEncodeValue(code);
- }
- bool EncodeFloating(double val, bool desc) {
- const EFPCode code = FPCode(val);
- Set(Double | code, 0, desc);
- return FPNeedEncodeValue(code);
- }
- void EncodeString(TStringBuf str, bool desc) {
- memcpy(Buf + 1, str.data(), str.size());
- Set(StringPart, BlockLength, desc);
- }
- void EncodeOptional(bool filled, bool desc) {
- Set(Extension | Optional | (filled ? OptionalFilled : 0), 0, desc);
- }
- bool Decode(TDecodeContext& ctx, TStringBuf str) {
- if (str.empty()) {
- ctx.Err = "No data";
- return false;
- }
- Len = 1;
- bool desc = false;
- ui8 byte = str[0];
- ui8 code = byte & CodeMask;
- if (code >= Descending) {
- desc = true;
- byte = ~byte;
- code = byte & CodeMask;
- }
- switch (code) {
- case StringPart:
- if (!Init(ctx, str, byte, desc)) {
- return false;
- }
- ctx.Str.append((const char*)Buf + 1, Len - 1);
- ++ctx.StrBlocks;
- break;
- case StringEnd: {
- if (!Init(ctx, str, byte, desc)) {
- return false;
- }
- const ui64 val = DecodeInt();
- if (val) {
- if (!ctx.StrBlocks) {
- ctx.Err = "Unexpected end of string";
- return false;
- }
- if (val > BlockLength) {
- ctx.Err = "Invalid string terminal";
- return false;
- }
- ctx.Str.erase(BlockLength * (ctx.StrBlocks - 1) + val);
- }
- ctx.StrBlocks = 0;
- break;
- }
- case IntNeg:
- if (!Init(ctx, str, ~byte, !desc)) {
- return false;
- }
- ctx.SignedVal = -(i64)DecodeInt();
- break;
- case IntNonNeg:
- if (!Init(ctx, str, byte, desc)) {
- return false;
- }
- ctx.SignedVal = DecodeInt();
- break;
- case Unsigned:
- if (!Init(ctx, str, byte, desc)) {
- return false;
- }
- ctx.UnsignedVal = DecodeInt();
- break;
- case Float:
- if (!DecodeFloating((EFPCode)(byte & FPCodeMask), ctx.FloatVal, ctx, str.Skip(Len))) {
- return false;
- }
- break;
- case Double:
- if (!DecodeFloating((EFPCode)(byte & FPCodeMask), ctx.DoubleVal, ctx, str.Skip(Len))) {
- return false;
- }
- break;
- case Extension:
- ctx.Optional = byte & Optional;
- ctx.Filled = byte & OptionalFilled;
- break;
- default:
- Y_ABORT("Invalid record code: %d", (int)code);
- }
- ctx.Code = (ECode)code;
- return true;
- }
- private:
- bool Init(TDecodeContext& ctx, TStringBuf str, ui8 byte, bool invert) {
- Len = (byte & LengthMask) + 1;
- if (Len > BufferLength) {
- ctx.Err = "Invalid block length";
- return false;
- }
- if (Len > str.size()) {
- ctx.Err = "Unexpected end of data";
- return false;
- }
- memcpy(Buf, str.data(), Len);
- if (invert) {
- Invert();
- }
- return true;
- }
- ui64 DecodeInt() const {
- ui64 val = 0;
- for (ui8 b = 1; b < Len; ++b) {
- const ui8 shift = Len - b - 1;
- if (shift < sizeof(ui64)) {
- val |= ((ui64)Buf[b]) << (8 * shift);
- }
- }
- return val;
- }
- ui8 EncodeInt(ui64 val) {
- const ui8 bytes = GetValueBitCount(val) / 8 + 1;
- for (ui8 b = 1; b <= bytes; ++b) {
- const ui8 shift = bytes - b;
- if (shift < sizeof(ui64)) {
- Buf[b] = val >> (8 * shift);
- }
- }
- return bytes;
- }
- static bool FPNeedEncodeValue(EFPCode code) {
- return code != Nan && code != Zero && code != NegInf && code != PosInf && code != Disabled;
- }
- template <typename T>
- static EFPCode FPCode(T val) {
- #ifdef PRESORT_FP_DISABLED
- Y_UNUSED(val);
- return Disabled;
- #else
- switch (std::fpclassify(val)) {
- case FP_INFINITE:
- return val < 0 ? NegInf : PosInf;
- case FP_NAN:
- return Nan;
- case FP_ZERO:
- return Zero;
- case FP_SUBNORMAL:
- return val < 0 ? NegSub : PosSub;
- case FP_NORMAL:
- break;
- }
- if (val < 0) {
- return val > -1 ? NegNear : NegFar;
- }
- return val < 1 ? PosNear : PosFar;
- #endif
- }
- template <typename T>
- bool DecodeFloating(EFPCode code, T& val, TDecodeContext& ctx, TStringBuf data) {
- #ifdef PRESORT_FP_DISABLED
- Y_UNUSED(code);
- Y_UNUSED(val);
- Y_UNUSED(data);
- ctx.Err = "Floating point numbers support is disabled";
- return false;
- #else
- switch (code) {
- case Zero:
- val = 0;
- return true;
- case NegInf:
- val = -std::numeric_limits<T>::infinity();
- return true;
- case PosInf:
- val = std::numeric_limits<T>::infinity();
- return true;
- case Nan:
- val = std::numeric_limits<T>::quiet_NaN();
- return true;
- case Disabled:
- ctx.Err = "Floating point numbers support was disabled on encoding";
- return false;
- default:
- break;
- }
- i64 exp = 0;
- i64 sig = 0;
- if (!DecodeFloatingPart(exp, ctx, data) || !DecodeFloatingPart(sig, ctx, data)) {
- return false;
- }
- val = ldexp(sig / TSignificandBase<T>::Value(), exp);
- return true;
- #endif
- }
- bool DecodeFloatingPart(i64& val, TDecodeContext& ctx, TStringBuf& data) {
- TBlock block;
- if (!block.Decode(ctx, data)) {
- return false;
- }
- if (ctx.Code != IntNeg && ctx.Code != IntNonNeg) {
- ctx.Err = "Invalid floating part";
- return false;
- }
- val = ctx.SignedVal;
- ctx.SignedVal = 0;
- data.Skip(block.GetLen());
- Len += block.GetLen();
- return true;
- }
- void Set(ui8 code, ui8 size, bool invert) {
- Y_ASSERT(size <= BlockLength);
- Buf[0] = code | size;
- Len = size + 1;
- if (invert) {
- Invert();
- }
- }
- void Invert() {
- Y_ASSERT(Len <= BufferLength);
- for (ui8 b = 0; b < Len; ++b) {
- Buf[b] = ~Buf[b];
- }
- }
- private:
- ui8 Buf[BufferLength];
- ui8 Len;
- };
- }
- inline void EncodeSignedInt(IOutputStream& out, i64 val, bool desc = false) {
- NImpl::TBlock block;
- block.EncodeSignedInt(val, desc);
- block.Put(out);
- }
- inline void EncodeUnsignedInt(IOutputStream& out, ui64 val, bool desc = false) {
- NImpl::TBlock block;
- block.EncodeUnsignedInt(val, desc);
- block.Put(out);
- }
- template <typename T>
- inline void EncodeFloating(IOutputStream& out, T val, bool desc = false) {
- NImpl::TBlock head;
- const bool encodeValue = head.EncodeFloating(val, desc);
- head.Put(out);
- if (encodeValue) {
- int exponent = 0;
- i64 significand = 0;
- significand = frexp(val, &exponent) * NImpl::TSignificandBase<T>::Value();
- NImpl::TBlock exp;
- exp.EncodeSignedInt(exponent, desc);
- exp.Put(out);
- NImpl::TBlock sig;
- sig.EncodeSignedInt(significand, desc);
- sig.Put(out);
- }
- }
- inline void EncodeString(IOutputStream& out, TStringBuf str, bool desc = false) {
- size_t part = 0;
- while (!str.empty()) {
- part = Min(str.size(), NImpl::BlockLength);
- NImpl::TBlock block;
- block.EncodeString(str.Head(part), desc);
- block.Put(out);
- str.Skip(part);
- }
- // Encode string end token
- NImpl::TBlock end;
- end.EncodeUnsignedInt(part, desc, true);
- end.Put(out);
- }
- template <bool Signed>
- struct TEncodeInt {
- static void Do(IOutputStream& out, i64 val, bool desc) {
- EncodeSignedInt(out, val, desc);
- }
- };
- template <>
- struct TEncodeInt<false> {
- static void Do(IOutputStream& out, ui64 val, bool desc) {
- EncodeUnsignedInt(out, val, desc);
- }
- };
- template <typename T, bool Integral>
- struct TEncodeNumber {
- static void Do(IOutputStream& out, const T& val, bool desc) {
- TEncodeInt<std::is_signed<T>::value>::Do(out, val, desc);
- }
- };
- template <typename T>
- struct TEncodeNumber<T, false> {
- static void Do(IOutputStream& out, const T& val, bool desc) {
- EncodeFloating(out, val, desc);
- }
- };
- template <typename T, bool Arithmetic>
- struct TEncodeValue {
- static void Do(IOutputStream& out, const T& val, bool desc) {
- TEncodeNumber<T, std::is_integral<T>::value>::Do(out, val, desc);
- }
- };
- template <typename T>
- struct TEncodeValue<T, false> {
- static void Do(IOutputStream& out, TStringBuf str, bool desc) {
- EncodeString(out, str, desc);
- }
- };
- template <typename T>
- static void Encode(IOutputStream& out, const T& val, bool desc = false) {
- TEncodeValue<T, std::is_arithmetic<T>::value>::Do(out, val, desc);
- }
- template <typename T>
- inline void EncodeOptional(IOutputStream& out, const TMaybe<T>& val, bool desc = false) {
- NImpl::TBlock block;
- block.EncodeOptional(val.Defined(), desc);
- block.Put(out);
- if (val.Defined()) {
- Encode(out, *val, desc);
- }
- }
- template <typename T>
- static void Encode(IOutputStream& out, const TMaybe<T>& val, bool desc = false) {
- EncodeOptional(out, val, desc);
- }
- struct TResultOps {
- void SetError(const TString&) {
- return;
- }
- void SetSignedInt(i64) {
- return;
- }
- void SetUnsignedInt(ui64) {
- return;
- }
- void SetFloat(float) {
- return;
- }
- void SetDouble(double) {
- return;
- }
- void SetString(const TString&) {
- return;
- }
- void SetOptional(bool) {
- return;
- }
- };
- template <typename TResult>
- bool Decode(TResult& res, TStringBuf data) {
- static_assert(std::is_base_of<TResultOps, TResult>::value, "Result must be derived from NPresort::TResultOps");
- using namespace NImpl;
- TDecodeContext ctx;
- while (!data.empty()) {
- TBlock block;
- if (!block.Decode(ctx, data)) {
- res.SetError(ctx.Err);
- return false;
- }
- if (ctx.StrBlocks && ctx.Code != StringPart) {
- res.SetError("Unexpected integer");
- return false;
- }
- switch (ctx.Code) {
- case StringEnd:
- res.SetString(ctx.Str);
- ctx.Str = TString();
- break;
- case IntNeg:
- case IntNonNeg:
- res.SetSignedInt(ctx.SignedVal);
- ctx.SignedVal = 0;
- break;
- case Unsigned:
- res.SetUnsignedInt(ctx.UnsignedVal);
- ctx.UnsignedVal = 0;
- break;
- case Float:
- res.SetFloat(ctx.FloatVal);
- ctx.FloatVal = 0;
- break;
- case Double:
- res.SetDouble(ctx.DoubleVal);
- ctx.DoubleVal = 0;
- break;
- case Extension:
- if (ctx.Optional) {
- res.SetOptional(ctx.Filled);
- ctx.Optional = false;
- ctx.Filled = false;
- }
- break;
- default:
- break;
- }
- data.Skip(block.GetLen());
- }
- return true;
- }
- }
|