Browse Source

Restoring authorship annotation for <kerzum@yandex-team.ru>. Commit 2 of 2.

kerzum 3 years ago
parent
commit
47a7e7b296

+ 17 - 17
library/cpp/charset/codepage.cpp

@@ -23,11 +23,11 @@
 using namespace NCodepagePrivate;
 
 void Recoder::Create(const CodePage& source, const CodePage& target) {
-    const Encoder* wideTarget = &EncoderByCharset(target.CPEnum); 
+    const Encoder* wideTarget = &EncoderByCharset(target.CPEnum);
     Create(source, wideTarget);
 }
 void Recoder::Create(const CodePage& page, wchar32 (*mapfunc)(wchar32)) {
-    const Encoder* widePage = &EncoderByCharset(page.CPEnum); 
+    const Encoder* widePage = &EncoderByCharset(page.CPEnum);
     Create(page, widePage, mapfunc);
 }
 
@@ -167,7 +167,7 @@ public:
         const char* name;
 
         for (size_t i = 0; i != CODES_MAX; ++i) {
-            ECharset e = static_cast<ECharset>(i); 
+            ECharset e = static_cast<ECharset>(i);
             const CodePage* page = Singleton<NCodepagePrivate::TCodepagesMap>()->GetPrivate(e);
 
             AddName(ToString(static_cast<int>(i)), e);
@@ -193,7 +193,7 @@ public:
 };
 
 ECharset CharsetByName(TStringBuf name) {
-    return Singleton<TCodePageHash>()->CharsetByName(name); 
+    return Singleton<TCodePageHash>()->CharsetByName(name);
 }
 
 ECharset CharsetByNameOrDie(TStringBuf name) {
@@ -203,7 +203,7 @@ ECharset CharsetByNameOrDie(TStringBuf name) {
     return result;
 }
 
-template <typename TxChar> 
+template <typename TxChar>
 static inline RECODE_RESULT utf8_read_rune_from_unknown_plane(TxChar& rune, size_t& rune_len, const TxChar* s, const TxChar* end) {
     if ((*s & 0xFF00) != 0xF000) {
         rune_len = 1;
@@ -251,17 +251,17 @@ static inline RECODE_RESULT utf8_read_rune_from_unknown_plane(TxChar& rune, size
 }
 
 template <typename TxChar>
-void DoDecodeUnknownPlane(TxChar* str, TxChar*& ee, const ECharset enc) { 
-    TxChar* e = ee; 
+void DoDecodeUnknownPlane(TxChar* str, TxChar*& ee, const ECharset enc) {
+    TxChar* e = ee;
     if (SingleByteCodepage(enc)) {
-        const CodePage* cp = CodePageByCharset(enc); 
-        for (TxChar* s = str; s < e; s++) { 
+        const CodePage* cp = CodePageByCharset(enc);
+        for (TxChar* s = str; s < e; s++) {
             if (Hi8(Lo16(*s)) == 0xF0)
                 *s = (TxChar)cp->unicode[Lo8(Lo16(*s))]; // NOT mb compliant
         }
     } else if (enc == CODES_UTF8) {
-        TxChar* s; 
-        TxChar* d; 
+        TxChar* s;
+        TxChar* d;
 
         for (s = d = str; s < e;) {
             size_t l = 0;
@@ -275,7 +275,7 @@ void DoDecodeUnknownPlane(TxChar* str, TxChar*& ee, const ECharset enc) {
         }
         e = d;
     } else if (enc == CODES_UNKNOWN) {
-        for (TxChar* s = str; s < e; s++) { 
+        for (TxChar* s = str; s < e; s++) {
             if (Hi8(Lo16(*s)) == 0xF0)
                 *s = Lo8(Lo16(*s));
         }
@@ -312,11 +312,11 @@ void DoDecodeUnknownPlane(TxChar* str, TxChar*& ee, const ECharset enc) {
 }
 
 void DecodeUnknownPlane(wchar16* str, wchar16*& ee, const ECharset enc) {
-    DoDecodeUnknownPlane(str, ee, enc); 
+    DoDecodeUnknownPlane(str, ee, enc);
 }
 void DecodeUnknownPlane(wchar32* str, wchar32*& ee, const ECharset enc) {
-    DoDecodeUnknownPlane(str, ee, enc); 
-} 
+    DoDecodeUnknownPlane(str, ee, enc);
+}
 
 namespace {
     class THashSetType: public THashSet<TString> {
@@ -471,7 +471,7 @@ public:
     }
 };
 
-ECharset EncodingHintByName(const char* encname) { 
+ECharset EncodingHintByName(const char* encname) {
     if (!encname)
         return CODES_UNKNOWN; // safety check
 
@@ -501,7 +501,7 @@ ECharset EncodingHintByName(const char* encname) {
 
     NormalizeEncodingPrefixes(enc);
 
-    ECharset hint = CharsetByName(enc.c_str()); 
+    ECharset hint = CharsetByName(enc.c_str());
     if (hint != CODES_UNKNOWN)
         return hint;
 

+ 20 - 20
library/cpp/charset/codepage.h

@@ -102,7 +102,7 @@ namespace NCodepagePrivate {
         const CodePage* Data[DataSize];
 
     private:
-        inline const CodePage* GetPrivate(ECharset e) const { 
+        inline const CodePage* GetPrivate(ECharset e) const {
             Y_ASSERT(e + DataShift >= 0 && e + DataShift < DataSize);
             return Data[e + DataShift];
         }
@@ -112,7 +112,7 @@ namespace NCodepagePrivate {
     public:
         TCodepagesMap();
 
-        inline const CodePage* Get(ECharset e) const { 
+        inline const CodePage* Get(ECharset e) const {
             const CodePage* res = GetPrivate(e);
             if (!res->SingleByteCodepage()) {
                 ythrow yexception() << "CodePage (" << (int)e << ") structure can only be used for single byte encodings";
@@ -121,13 +121,13 @@ namespace NCodepagePrivate {
             return res;
         }
 
-        inline bool SingleByteCodepage(ECharset e) const { 
+        inline bool SingleByteCodepage(ECharset e) const {
             return GetPrivate(e)->SingleByteCodepage();
         }
-        inline bool NativeCodepage(ECharset e) const { 
+        inline bool NativeCodepage(ECharset e) const {
             return GetPrivate(e)->NativeCodepage();
         }
-        inline const char* NameByCharset(ECharset e) const { 
+        inline const char* NameByCharset(ECharset e) const {
             return GetPrivate(e)->Names[0];
         }
 
@@ -136,20 +136,20 @@ namespace NCodepagePrivate {
         friend class ::TCodePageHash;
     };
 
-    inline bool NativeCodepage(ECharset e) { 
+    inline bool NativeCodepage(ECharset e) {
         return ::NCodepagePrivate::TCodepagesMap::Instance().NativeCodepage(e);
     }
 }
 
-inline bool SingleByteCodepage(ECharset e) { 
+inline bool SingleByteCodepage(ECharset e) {
     return ::NCodepagePrivate::TCodepagesMap::Instance().SingleByteCodepage(e);
 }
 
-inline bool ValidCodepage(ECharset e) { 
+inline bool ValidCodepage(ECharset e) {
     return e >= 0 && e < CODES_MAX;
 }
 
-inline const CodePage* CodePageByCharset(ECharset e) { 
+inline const CodePage* CodePageByCharset(ECharset e) {
     return ::NCodepagePrivate::TCodepagesMap::Instance().Get(e);
 }
 
@@ -158,11 +158,11 @@ ECharset CharsetByName(TStringBuf name);
 // Same as CharsetByName, but throws yexception() if name is invalid
 ECharset CharsetByNameOrDie(TStringBuf name);
 
-inline ECharset CharsetByCodePage(const CodePage* CP) { 
+inline ECharset CharsetByCodePage(const CodePage* CP) {
     return CP->CPEnum;
 }
 
-inline const char* NameByCharset(ECharset e) { 
+inline const char* NameByCharset(ECharset e) {
     return ::NCodepagePrivate::TCodepagesMap::Instance().NameByCharset(e);
 }
 
@@ -178,14 +178,14 @@ inline const char* NameByCodePage(const CodePage* CP) {
 }
 
 inline const CodePage* CodePageByName(const char* name) {
-    ECharset code = CharsetByName(name); 
+    ECharset code = CharsetByName(name);
     if (code == CODES_UNKNOWN)
         return nullptr;
 
-    return CodePageByCharset(code); 
+    return CodePageByCharset(code);
 }
 
-ECharset EncodingHintByName(const char* name); 
+ECharset EncodingHintByName(const char* name);
 
 /*****************************************************************\
 *                    struct Encoder                               *
@@ -243,7 +243,7 @@ struct Recoder {
 
 extern const struct Encoder& WideCharToYandex;
 
-const Encoder& EncoderByCharset(ECharset enc); 
+const Encoder& EncoderByCharset(ECharset enc);
 
 namespace NCodepagePrivate {
     class TCodePageData {
@@ -260,13 +260,13 @@ namespace NCodepagePrivate {
 
         friend struct ::CodePage;
         friend class TCodepagesMap;
-        friend RECODE_RESULT _recodeToYandex(ECharset, const char*, char*, size_t, size_t, size_t&, size_t&); 
-        friend RECODE_RESULT _recodeFromYandex(ECharset, const char*, char*, size_t, size_t, size_t&, size_t&); 
-        friend const Encoder& ::EncoderByCharset(ECharset enc); 
+        friend RECODE_RESULT _recodeToYandex(ECharset, const char*, char*, size_t, size_t, size_t&, size_t&);
+        friend RECODE_RESULT _recodeFromYandex(ECharset, const char*, char*, size_t, size_t, size_t&, size_t&);
+        friend const Encoder& ::EncoderByCharset(ECharset enc);
     };
 }
 
-inline const Encoder& EncoderByCharset(ECharset enc) { 
+inline const Encoder& EncoderByCharset(ECharset enc) {
     if (!SingleByteCodepage(enc)) {
         ythrow yexception() << "Encoder structure can only be used for single byte encodings";
     }
@@ -286,7 +286,7 @@ inline unsigned char CodePage::ToTitle(unsigned char ch) const {
 
 extern const CodePage& csYandex;
 
-/// these functions change (lowers) [end] position in case of utf-8 
+/// these functions change (lowers) [end] position in case of utf-8
 /// null character is NOT assumed or written at [*end]
 void DecodeUnknownPlane(wchar16* start, wchar16*& end, const ECharset enc4unk);
 void DecodeUnknownPlane(wchar32* start, wchar32*& end, const ECharset enc4unk);

+ 17 - 17
library/cpp/charset/codepage_ut.cpp

@@ -56,7 +56,7 @@ public:
     void TestCanEncode();
 
     inline void TestUpperLower() {
-        const CodePage* cp = CodePageByCharset(CODES_ASCII); 
+        const CodePage* cp = CodePageByCharset(CODES_ASCII);
         char tmp[100];
 
         TStringBuf s = "abcde";
@@ -161,13 +161,13 @@ void TCodepageTest::TestUTF() {
 }
 
 void TCodepageTest::TestBrokenMultibyte() {
-    const ECharset cp = CODES_EUC_JP; 
- 
+    const ECharset cp = CODES_EUC_JP;
+
     const char sampletext[] = {'\xe3'};
     wchar32 recodeResult[100];
- 
-    size_t nwritten = 0; 
-    size_t nread = 0; 
+
+    size_t nwritten = 0;
+    size_t nread = 0;
 
     RECODE_RESULT res = RecodeToUnicode(cp, sampletext, recodeResult, Y_ARRAY_SIZE(sampletext), Y_ARRAY_SIZE(recodeResult), nread, nwritten);
     UNIT_ASSERT(res == RECODE_OK);
@@ -178,8 +178,8 @@ void TCodepageTest::TestBrokenMultibyte() {
     res = RecodeToUnicode(cp, bigSample, recodeResult, Y_ARRAY_SIZE(bigSample), Y_ARRAY_SIZE(recodeResult), nread, nwritten);
     UNIT_ASSERT(res == RECODE_OK);
     UNIT_ASSERT(nread == Y_ARRAY_SIZE(bigSample));
-} 
- 
+}
+
 void TCodepageTest::TestUTFFromUnknownPlane() {
     static const wchar32 sampletext[] = {0x61, 0x62, 0x63, 0x20,
                                          0x430, 0x431, 0x432, 0x20,
@@ -250,21 +250,21 @@ void TCodepageTest::TestUTFFromUnknownPlane() {
     }
     UNIT_ASSERT(0x1234 == charbuffer[samplelen]);
     UNIT_ASSERT(0x5678 == charbuffer[2 * samplelen + 1]);
- 
-    // test TChar version 
-    // bytebuffer of len writtenbytes contains sampletext of len samplelen chars in utf8 
+
+    // test TChar version
+    // bytebuffer of len writtenbytes contains sampletext of len samplelen chars in utf8
     TUtf16String wtr = CharToWide(TStringBuf(bytebuffer, writtenbytes), CODES_UNKNOWNPLANE);
     TChar* strend = wtr.begin() + wtr.size();
-    DecodeUnknownPlane(wtr.begin(), strend, CODES_UTF8); 
+    DecodeUnknownPlane(wtr.begin(), strend, CODES_UTF8);
     wtr.resize(strend - wtr.data(), 'Q');
     UNIT_ASSERT_VALUES_EQUAL(wtr.size(), samplelen);
     for (size_t i = 0; i < wtr.size(); ++i) {
-        if (sampletext[i] >= 0x10000) { 
-            UNIT_ASSERT_VALUES_EQUAL(wtr[i], ' '); 
+        if (sampletext[i] >= 0x10000) {
+            UNIT_ASSERT_VALUES_EQUAL(wtr[i], ' ');
         } else {
-            UNIT_ASSERT_VALUES_EQUAL(wtr[i], sampletext[i]); 
-        } 
-    } 
+            UNIT_ASSERT_VALUES_EQUAL(wtr[i], sampletext[i]);
+        }
+    }
 }
 
 static void TestSurrogates(const char* str, const wchar16* wide, size_t wideSize) {

+ 55 - 55
library/cpp/charset/doccodes.h

@@ -40,66 +40,66 @@ enum ECharset {
     CODES_WINDOWS_1255, // [31] for Hebrew
     CODES_WINDOWS_1256, // [32] for Arabic
     CODES_WINDOWS_1257, // [33] for Estonian, Latvian and Lithuanian
- 
-    // these codes are all the other 8bit codes known by libiconv 
-    // they follow in alphanumeric order 
-    CODES_CP1046, 
-    CODES_CP1124, 
-    CODES_CP1125, 
-    CODES_CP1129, 
-    CODES_CP1131, 
-    CODES_CP1133, 
+
+    // these codes are all the other 8bit codes known by libiconv
+    // they follow in alphanumeric order
+    CODES_CP1046,
+    CODES_CP1124,
+    CODES_CP1125,
+    CODES_CP1129,
+    CODES_CP1131,
+    CODES_CP1133,
     CODES_CP1161, // [40]
-    CODES_CP1162, 
-    CODES_CP1163, 
-    CODES_CP1258, 
-    CODES_CP437, 
-    CODES_CP737, 
-    CODES_CP775, 
-    CODES_CP850, 
-    CODES_CP852, 
-    CODES_CP853, 
+    CODES_CP1162,
+    CODES_CP1163,
+    CODES_CP1258,
+    CODES_CP437,
+    CODES_CP737,
+    CODES_CP775,
+    CODES_CP850,
+    CODES_CP852,
+    CODES_CP853,
     CODES_CP856, // [50]
-    CODES_CP857, 
-    CODES_CP858, 
-    CODES_CP860, 
-    CODES_CP861, 
-    CODES_CP862, 
-    CODES_CP863, 
-    CODES_CP864, 
-    CODES_CP865, 
-    CODES_CP869, 
+    CODES_CP857,
+    CODES_CP858,
+    CODES_CP860,
+    CODES_CP861,
+    CODES_CP862,
+    CODES_CP863,
+    CODES_CP864,
+    CODES_CP865,
+    CODES_CP869,
     CODES_CP874, // [60]
-    CODES_CP922, 
-    CODES_HP_ROMAN8, 
-    CODES_ISO646_CN, 
-    CODES_ISO646_JP, 
-    CODES_ISO8859_10, 
-    CODES_ISO8859_11, 
-    CODES_ISO8859_14, 
-    CODES_JISX0201, 
-    CODES_KOI8_T, 
+    CODES_CP922,
+    CODES_HP_ROMAN8,
+    CODES_ISO646_CN,
+    CODES_ISO646_JP,
+    CODES_ISO8859_10,
+    CODES_ISO8859_11,
+    CODES_ISO8859_14,
+    CODES_JISX0201,
+    CODES_KOI8_T,
     CODES_MAC_ARABIC, // [70]
-    CODES_MAC_CENTRALEUROPE, 
-    CODES_MAC_CROATIAN, 
-    CODES_MAC_GREEK, 
-    CODES_MAC_HEBREW, 
-    CODES_MAC_ICELAND, 
-    CODES_MAC_ROMANIA, 
-    CODES_MAC_ROMAN, 
-    CODES_MAC_THAI, 
-    CODES_MAC_TURKISH, 
+    CODES_MAC_CENTRALEUROPE,
+    CODES_MAC_CROATIAN,
+    CODES_MAC_GREEK,
+    CODES_MAC_HEBREW,
+    CODES_MAC_ICELAND,
+    CODES_MAC_ROMANIA,
+    CODES_MAC_ROMAN,
+    CODES_MAC_THAI,
+    CODES_MAC_TURKISH,
     CODES_RESERVED_2, // [80] reserved code: use it for new encodings before adding them to the end of the list
-    CODES_MULELAO, 
-    CODES_NEXTSTEP, 
-    CODES_PT154, 
-    CODES_RISCOS_LATIN1, 
-    CODES_RK1048, 
-    CODES_TCVN, 
-    CODES_TDS565, 
-    CODES_TIS620, 
-    CODES_VISCII, 
- 
+    CODES_MULELAO,
+    CODES_NEXTSTEP,
+    CODES_PT154,
+    CODES_RISCOS_LATIN1,
+    CODES_RK1048,
+    CODES_TCVN,
+    CODES_TDS565,
+    CODES_TIS620,
+    CODES_VISCII,
+
     // libiconv multibyte codepages
     CODES_BIG5, // [90]
     CODES_BIG5_HKSCS,

File diff suppressed because it is too large
+ 242 - 242
library/cpp/charset/generated/cp_data.cpp


File diff suppressed because it is too large
+ 326 - 887
library/cpp/charset/generated/encrec_data.cpp


+ 3 - 3
library/cpp/charset/iconv.h

@@ -7,11 +7,11 @@
 // WARNING: Do not use this functions - use functions from wide.h or recyr.hh instead.
 
 namespace NICONVPrivate {
-    inline const char* CharsetName(ECharset code) { 
-        return NameByCharset(code); 
+    inline const char* CharsetName(ECharset code) {
+        return NameByCharset(code);
     }
 
-    inline const char* CharsetName(const char* code) { 
+    inline const char* CharsetName(const char* code) {
         return code;
     }
 

+ 2 - 2
library/cpp/charset/wide.cpp

@@ -1,5 +1,5 @@
-#include "wide.h" 
- 
+#include "wide.h"
+
 bool CanBeEncoded(TWtringBuf text, ECharset encoding) {
     const size_t LEN = 16;
     const size_t BUFSIZE = LEN * 4;

+ 4 - 4
library/cpp/charset/wide.h

@@ -1,5 +1,5 @@
 #pragma once
- 
+
 #include "codepage.h"
 #include "iconv.h"
 
@@ -22,7 +22,7 @@ inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset
 
     const char* start = dest;
 
-    const Encoder* const encoder = &EncoderByCharset(enc); 
+    const Encoder* const encoder = &EncoderByCharset(enc);
     const TCharType* const last = text + len;
     for (const TCharType* cur = text; cur != last; ++dest) {
         *dest = encoder->Tr(ReadSymbolAndAdvance(cur, last));
@@ -257,7 +257,7 @@ inline TUtf16String CharToWide(const char* text, size_t len, ECharset enc) {
         if (enc == CODES_UTF8)
             return UTF8ToWide<robust>(text, len);
 
-        return CharToWide(text, len, *CodePageByCharset(enc)); 
+        return CharToWide(text, len, *CodePageByCharset(enc));
     }
 
     TUtf16String w = TUtf16String::Uninitialized(len * 2);
@@ -300,7 +300,7 @@ inline TUtf16String CharToWide(const TStringBuf s, ECharset enc) {
 inline TUtf16String CharToWide(const TStringBuf s, const CodePage& cp) {
     return CharToWide(s.data(), s.size(), cp);
 }
- 
+
 // true if @text can be fully encoded to specified @encoding,
 // with possibility to recover exact original text after decoding
 bool CanBeEncoded(TWtringBuf text, ECharset encoding);

+ 3 - 3
library/cpp/charset/wide_ut.cpp

@@ -343,7 +343,7 @@ void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) {
 
 void TConversionTest::TestRecode() {
     for (int c = 0; c != CODES_MAX; ++c) {
-        ECharset enc = static_cast<ECharset>(c); 
+        ECharset enc = static_cast<ECharset>(c);
         if (!SingleByteCodepage(enc))
             continue;
 
@@ -385,11 +385,11 @@ void TConversionTest::TestRecode() {
 
 void TConversionTest::TestUnicodeLimit() {
     for (int i = 0; i != CODES_MAX; ++i) {
-        ECharset code = static_cast<ECharset>(i); 
+        ECharset code = static_cast<ECharset>(i);
         if (!SingleByteCodepage(code))
             continue;
 
-        const CodePage* page = CodePageByCharset(code); 
+        const CodePage* page = CodePageByCharset(code);
         Y_ASSERT(page);
 
         for (int c = 0; c < 256; ++c) {

Some files were not shown because too many files changed in this diff