123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 |
- #pragma once
- #include "common.h"
- #include <util/stream/output.h>
- namespace NUri {
- namespace NEncode {
- #define CHAR_TYPE_NAME(f) _ECT##f
- #define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f)
- enum ECharType {
- CHAR_TYPE_NAME(Digit),
- CHAR_TYPE_NAME(Lower),
- CHAR_TYPE_NAME(Upper),
- CHAR_TYPE_NAME(Unres),
- CHAR_TYPE_NAME(Stdrd),
- };
- enum ECharFlag {
- CHAR_TYPE_FLAG(Digit),
- CHAR_TYPE_FLAG(Lower),
- CHAR_TYPE_FLAG(Upper),
- CHAR_TYPE_FLAG(Unres),
- CHAR_TYPE_FLAG(Stdrd),
- // compound group flags
- ECGAlpha = ECFUpper | ECFLower,
- ECGAlnum = ECGAlpha | ECFDigit,
- ECGUnres = ECGAlnum | ECFUnres,
- ECGStdrd = ECGUnres | ECFStdrd,
- };
- #undef CHAR_TYPE_NAME
- #undef CHAR_TYPE_FLAG
- struct TCharFlags {
- ui32 TypeFlags;
- ui64 FeatFlags;
- ui32 DecodeFld; // decode if FeatureDecodeFieldAllowed
- ui32 EncodeFld; // encode if shouldn't be treated as delimiter
- TCharFlags(ui64 feat = 0)
- : TypeFlags(0)
- , FeatFlags(feat)
- , DecodeFld(0)
- , EncodeFld(0)
- {
- }
- TCharFlags(ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0)
- : TypeFlags(type)
- , FeatFlags(feat)
- , DecodeFld(decmask)
- , EncodeFld(encmask)
- {
- }
- TCharFlags& Add(const TCharFlags& val) {
- TypeFlags |= val.TypeFlags;
- FeatFlags |= val.FeatFlags;
- DecodeFld |= val.DecodeFld;
- EncodeFld |= val.EncodeFld;
- return *this;
- }
- bool IsAllowed(ui32 fldmask) const {
- return (TypeFlags & ECGUnres) || (DecodeFld & ~EncodeFld & fldmask);
- }
- // should we decode an encoded character
- bool IsDecode(ui32 fldmask, ui64 flags) const;
- };
- class TEncodeMapperBase {
- protected:
- TEncodeMapperBase()
- : Flags(0)
- , FldMask(0)
- , Q_DecodeAny(false)
- {
- }
- TEncodeMapperBase(ui64 flags, TField::EField fld)
- : Flags(flags)
- , FldMask(1u << fld)
- , Q_DecodeAny(flags & TFeature::FeatureDecodeANY)
- {
- }
- public:
- bool Is(TField::EField fld) const {
- return FldMask & (1u << fld);
- }
- protected:
- const ui64 Flags;
- const ui32 FldMask;
- const bool Q_DecodeAny; // this is a special option for username/password
- };
- // maps a sym or hex character and indicates whether it has to be encoded
- class TEncodeMapper
- : public TEncodeMapperBase {
- public:
- TEncodeMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
- : TEncodeMapperBase(flags, fld)
- , Q_EncodeSpcAsPlus(flags & TFeature::FeatureEncodeSpaceAsPlus)
- {
- }
- // negative=sym, positive=hex, zero=maybesym
- int EncodeSym(unsigned char&) const;
- int EncodeHex(unsigned char&) const;
- protected:
- const bool Q_EncodeSpcAsPlus;
- };
- // indicates whether a character has to be encoded when copying to a field
- class TEncodeToMapper
- : public TEncodeMapperBase {
- public:
- TEncodeToMapper()
- : TEncodeMapperBase()
- {
- }
- TEncodeToMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
- : TEncodeMapperBase(flags, fld)
- {
- }
- bool Enabled() const {
- return 0 != FldMask;
- }
- bool Encode(unsigned char) const;
- };
- class TEncoder {
- public:
- TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper());
- ui64 ReEncode(const TStringBuf& url);
- ui64 ReEncode(const char* str, size_t len) {
- return ReEncode(TStringBuf(str, len));
- }
- protected:
- static bool IsType(unsigned char c, ui64 flags) {
- return GetFlags(c).TypeFlags & flags;
- }
- public:
- static bool IsDigit(unsigned char c) {
- return IsType(c, ECFDigit);
- }
- static bool IsUpper(unsigned char c) {
- return IsType(c, ECFUpper);
- }
- static bool IsLower(unsigned char c) {
- return IsType(c, ECFLower);
- }
- static bool IsAlpha(unsigned char c) {
- return IsType(c, ECGAlpha);
- }
- static bool IsAlnum(unsigned char c) {
- return IsType(c, ECGAlnum);
- }
- static bool IsUnres(unsigned char c) {
- return IsType(c, ECGUnres);
- }
- static const TCharFlags& GetFlags(unsigned char c) {
- return Grammar().Get(c);
- }
- public:
- // process an encoded string, decoding safe chars and encoding unsafe
- static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) {
- TEncoder(out, srcfld).ReEncode(val);
- return out;
- }
- static IOutputStream& ReEncodeTo(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld, const TEncodeToMapper& dstfld) {
- TEncoder(out, srcfld, dstfld).ReEncode(val);
- return out;
- }
- // see also UrlUnescape() from string/quote.h
- static IOutputStream& Decode(
- IOutputStream& out, const TStringBuf& val, ui64 flags) {
- return ReEncode(out, val, flags | TFeature::FeatureDecodeANY);
- }
- public:
- // process a raw string or char, encode as needed
- static IOutputStream& Hex(IOutputStream& out, unsigned char val);
- static IOutputStream& Encode(IOutputStream& out, unsigned char val) {
- out << '%';
- return Hex(out, val);
- }
- static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val);
- static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val);
- static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld);
- static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags);
- static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) {
- return EncodeField(out, val, TField::FieldAllMAX);
- }
- static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) {
- return EncodeField(out, val, TField::FieldAllMAX, flags);
- }
- public:
- class TGrammar {
- TCharFlags Map_[256];
- public:
- TGrammar();
- const TCharFlags& Get(unsigned char ch) const {
- return Map_[ch];
- }
- TCharFlags& GetMutable(unsigned char ch) {
- return Map_[ch];
- }
- TCharFlags& Add(unsigned char ch, const TCharFlags& val) {
- return GetMutable(ch).Add(val);
- }
- void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) {
- for (unsigned i = lo; i <= hi; ++i)
- Add(i, val);
- }
- void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
- AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask));
- }
- void Add(const TStringBuf& set, const TCharFlags& val) {
- for (size_t i = 0; i != set.length(); ++i)
- Add(set[i], val);
- }
- void Add(const TStringBuf& set, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
- Add(set, TCharFlags(type, feat, decmask, encmask));
- }
- };
- static const TGrammar& Grammar();
- protected:
- IOutputStream& Out;
- const TEncodeMapper FldSrc;
- const TEncodeToMapper FldDst;
- ui64 OutFlags;
- int HexValue;
- protected:
- void HexReset() {
- HexValue = 0;
- }
- void HexDigit(char c) {
- HexAdd(c - '0');
- }
- void HexUpper(char c) {
- HexAdd(c - 'A' + 10);
- }
- void HexLower(char c) {
- HexAdd(c - 'a' + 10);
- }
- void HexAdd(int val) {
- HexValue <<= 4;
- HexValue += val;
- }
- protected:
- void DoSym(unsigned char ch) {
- const int res = FldSrc.EncodeSym(ch);
- Do(ch, res);
- }
- void DoHex(unsigned char ch) {
- const int res = FldSrc.EncodeHex(ch);
- Do(ch, res);
- }
- void DoHex() {
- DoHex(HexValue);
- HexValue = 0;
- }
- void Do(unsigned char, int);
- };
- }
- using TEncoder = NEncode::TEncoder;
- }
|