extraencodings.cpp 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. #include <util/system/defaults.h>
  2. #include <util/system/yassert.h>
  3. #include <library/cpp/charset/codepage.h>
  4. #include <util/generic/singleton.h>
  5. #include <util/generic/yexception.h>
  6. #include <library/cpp/charset/doccodes.h>
  7. #include "pire.h"
  8. namespace NPire {
  9. namespace {
  10. // A one-byte encoding which is capable of transforming upper half of the character
  11. // table to/from Unicode chars.
  12. class TOneByte: public TEncoding {
  13. public:
  14. TOneByte(ECharset doccode) {
  15. Table_ = CodePageByCharset(doccode)->unicode;
  16. for (size_t i = 0; i < 256; ++i)
  17. Reverse_.insert(std::make_pair(Table_[i], static_cast<char>(i)));
  18. }
  19. wchar32 FromLocal(const char*& begin, const char* end) const override {
  20. if (begin != end)
  21. return Table_[static_cast<unsigned char>(*begin++)];
  22. else
  23. ythrow yexception() << "EOF reached in Pire::OneByte::fromLocal()";
  24. }
  25. TString ToLocal(wchar32 c) const override {
  26. THashMap<wchar32, char>::const_iterator i = Reverse_.find(c);
  27. if (i != Reverse_.end())
  28. return TString(1, i->second);
  29. else
  30. return TString();
  31. }
  32. void AppendDot(TFsm& fsm) const override {
  33. fsm.AppendDot();
  34. }
  35. private:
  36. const wchar32* Table_;
  37. THashMap<wchar32, char> Reverse_;
  38. };
  39. template <unsigned N>
  40. struct TOneByteHelper: public TOneByte {
  41. inline TOneByteHelper()
  42. : TOneByte((ECharset)N)
  43. {
  44. }
  45. };
  46. }
  47. namespace NEncodings {
  48. const NPire::TEncoding& Koi8r() {
  49. return *Singleton<TOneByteHelper<CODES_KOI8>>();
  50. }
  51. const NPire::TEncoding& Cp1251() {
  52. return *Singleton<TOneByteHelper<CODES_WIN>>();
  53. }
  54. const NPire::TEncoding& Get(ECharset encoding) {
  55. switch (encoding) {
  56. case CODES_WIN:
  57. return Cp1251();
  58. case CODES_KOI8:
  59. return Koi8r();
  60. case CODES_ASCII:
  61. return NPire::NEncodings::Latin1();
  62. case CODES_UTF8:
  63. return NPire::NEncodings::Utf8();
  64. default:
  65. ythrow yexception() << "Pire::Encodings::get(ECharset): unknown encoding " << (int)encoding;
  66. }
  67. }
  68. }
  69. }