123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- #include <util/system/defaults.h>
- #include <util/system/yassert.h>
- #include <library/cpp/charset/codepage.h>
- #include <util/generic/singleton.h>
- #include <util/generic/yexception.h>
- #include <library/cpp/charset/doccodes.h>
- #include "pire.h"
- namespace NPire {
- namespace {
- // A one-byte encoding which is capable of transforming upper half of the character
- // table to/from Unicode chars.
- class TOneByte: public TEncoding {
- public:
- TOneByte(ECharset doccode) {
- Table_ = CodePageByCharset(doccode)->unicode;
- for (size_t i = 0; i < 256; ++i)
- Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
- }
- wchar32 FromLocal(const char*& begin, const char* end) const override {
- if (begin != end)
- return Table_[static_cast<unsigned char>(*begin++)];
- else
- ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
- }
- TString ToLocal(wchar32 c) const override {
- THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
- if (i != Reverse_.end())
- return TString(1, i->second);
- else
- return TString();
- }
- void AppendDot(TFsm& fsm) const override {
- fsm.AppendDot();
- }
- private:
- const wchar32* Table_;
- THashMap<wchar32, char> Reverse_;
- };
- template <unsigned N>
- struct TOneByteHelper: public TOneByte {
- inline TOneByteHelper()
- : TOneByte((ECharset)N)
- {
- }
- };
- }
- namespace NEncodings {
- const NPire::TEncoding& Koi8r() {
- return *Singleton<TOneByteHelper<CODES_KOI8>>();
- }
- const NPire::TEncoding& Cp1251() {
- return *Singleton<TOneByteHelper<CODES_WIN>>();
- }
- const NPire::TEncoding& Get(ECharset encoding) {
- switch (encoding) {
- case CODES_WIN:
- return Cp1251();
- case CODES_KOI8:
- return Koi8r();
- case CODES_ASCII:
- return NPire::NEncodings::Latin1();
- case CODES_UTF8:
- return NPire::NEncodings::Utf8();
- default:
- ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
- }
- }
- }
- }
|