codecs_registry.cpp 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #include "codecs_registry.h"
  2. #include "delta_codec.h"
  3. #include "huffman_codec.h"
  4. #include "pfor_codec.h"
  5. #include "solar_codec.h"
  6. #include "comptable_codec.h"
  7. #include "zstd_dict_codec.h"
  8. #include <library/cpp/blockcodecs/codecs.h>
  9. #include <util/string/builder.h>
  10. #include <util/string/cast.h>
  11. namespace NCodecs {
  12. TCodecPtr ICodec::GetInstance(TStringBuf name) {
  13. return Singleton<NPrivate::TCodecRegistry>()->GetCodec(name);
  14. }
  15. TVector<TString> ICodec::GetCodecsList() {
  16. return Singleton<NPrivate::TCodecRegistry>()->GetCodecsList();
  17. }
  18. namespace NPrivate {
  19. void TCodecRegistry::RegisterFactory(TFactoryPtr fac) {
  20. TVector<TString> names = fac->ListNames();
  21. for (const auto& name : names) {
  22. Y_ABORT_UNLESS(!Registry.contains(name), "already has %s", name.data());
  23. Registry[name] = fac;
  24. }
  25. }
  26. TCodecPtr TCodecRegistry::GetCodec(TStringBuf name) const {
  27. using namespace NPrivate;
  28. if (!name || "none" == name) {
  29. return nullptr;
  30. }
  31. if (TStringBuf::npos == name.find(':')) {
  32. Y_ENSURE_EX(Registry.contains(name), TNoCodecException(name));
  33. return Registry.find(name)->second->MakeCodec(name);
  34. } else {
  35. TPipelineCodec* pipe = new TPipelineCodec;
  36. do {
  37. TStringBuf v = name.NextTok(':');
  38. pipe->AddCodec(GetCodec(v));
  39. } while (name);
  40. return pipe;
  41. }
  42. }
  43. TVector<TString> TCodecRegistry::GetCodecsList() const {
  44. using namespace NPrivate;
  45. TVector<TString> vs;
  46. vs.push_back("none");
  47. for (const auto& it : Registry) {
  48. vs.push_back(it.first);
  49. }
  50. Sort(vs.begin(), vs.end());
  51. return vs;
  52. }
  53. struct TSolarCodecFactory : ICodecFactory {
  54. TCodecPtr MakeCodec(TStringBuf name) const override {
  55. if (TSolarCodec::MyNameShortInt() == name) {
  56. return new TSolarCodecShortInt();
  57. }
  58. if (TSolarCodec::MyName() == name) {
  59. return new TSolarCodec();
  60. }
  61. if (name.EndsWith(TStringBuf("-a"))) {
  62. return MakeCodecImpl<TAdaptiveSolarCodec>(name, name.SubStr(TSolarCodec::MyName().size()).Chop(2));
  63. } else {
  64. return MakeCodecImpl<TSolarCodec>(name, name.SubStr(TSolarCodec::MyName().size()));
  65. }
  66. }
  67. template <class TCodecCls>
  68. TCodecPtr MakeCodecImpl(const TStringBuf& name, const TStringBuf& type) const {
  69. if (TStringBuf("-8k") == type) {
  70. return new TCodecCls(1 << 13);
  71. }
  72. if (TStringBuf("-16k") == type) {
  73. return new TCodecCls(1 << 14);
  74. }
  75. if (TStringBuf("-32k") == type) {
  76. return new TCodecCls(1 << 15);
  77. }
  78. if (TStringBuf("-64k") == type) {
  79. return new TCodecCls(1 << 16);
  80. }
  81. if (TStringBuf("-256k") == type) {
  82. return new TCodecCls(1 << 18);
  83. }
  84. ythrow TNoCodecException(name);
  85. }
  86. TVector<TString> ListNames() const override {
  87. TVector<TString> vs;
  88. vs.push_back(ToString(TSolarCodec::MyName()));
  89. vs.push_back(ToString(TSolarCodec::MyName8k()));
  90. vs.push_back(ToString(TSolarCodec::MyName16k()));
  91. vs.push_back(ToString(TSolarCodec::MyName32k()));
  92. vs.push_back(ToString(TSolarCodec::MyName64k()));
  93. vs.push_back(ToString(TSolarCodec::MyName256k()));
  94. vs.push_back(ToString(TSolarCodec::MyName8kAdapt()));
  95. vs.push_back(ToString(TSolarCodec::MyName16kAdapt()));
  96. vs.push_back(ToString(TSolarCodec::MyName32kAdapt()));
  97. vs.push_back(ToString(TSolarCodec::MyName64kAdapt()));
  98. vs.push_back(ToString(TSolarCodec::MyName256kAdapt()));
  99. vs.push_back(ToString(TSolarCodec::MyNameShortInt()));
  100. return vs;
  101. }
  102. };
  103. struct TZStdDictCodecFactory : ICodecFactory {
  104. TCodecPtr MakeCodec(TStringBuf name) const override {
  105. return new TZStdDictCodec(TZStdDictCodec::ParseCompressionName(name));
  106. }
  107. TVector<TString> ListNames() const override {
  108. return TZStdDictCodec::ListCompressionNames();
  109. }
  110. };
  111. struct TCompTableCodecFactory : ICodecFactory {
  112. TCodecPtr MakeCodec(TStringBuf name) const override {
  113. if (TCompTableCodec::MyNameHQ() == name) {
  114. return new TCompTableCodec(TCompTableCodec::Q_HIGH);
  115. } else if (TCompTableCodec::MyNameLQ() == name) {
  116. return new TCompTableCodec(TCompTableCodec::Q_LOW);
  117. } else {
  118. Y_ENSURE_EX(false, TNoCodecException(name));
  119. return nullptr;
  120. }
  121. }
  122. TVector<TString> ListNames() const override {
  123. TVector<TString> vs;
  124. vs.push_back(ToString(TCompTableCodec::MyNameHQ()));
  125. vs.push_back(ToString(TCompTableCodec::MyNameLQ()));
  126. return vs;
  127. }
  128. };
  129. struct TBlockCodec : ICodec {
  130. const NBlockCodecs::ICodec* Codec;
  131. TBlockCodec(TStringBuf name)
  132. : Codec(NBlockCodecs::Codec(name))
  133. {
  134. }
  135. TString GetName() const override {
  136. return ToString(Codec->Name());
  137. }
  138. ui8 Encode(TStringBuf r, TBuffer& b) const override {
  139. Codec->Encode(r, b);
  140. return 0;
  141. }
  142. void Decode(TStringBuf r, TBuffer& b) const override {
  143. // TODO: throws exception that is not TCodecException
  144. Codec->Decode(r, b);
  145. }
  146. protected:
  147. void DoLearn(ISequenceReader&) override {
  148. }
  149. };
  150. struct TBlockCodecsFactory : ICodecFactory {
  151. using TRegistry = THashMap<TString, TCodecPtr>;
  152. TRegistry Registry;
  153. TBlockCodecsFactory() {
  154. for (TStringBuf codec : NBlockCodecs::ListAllCodecs()) {
  155. Register(codec);
  156. }
  157. }
  158. void Register(TStringBuf name) {
  159. TCodecPtr p = Registry[name] = new TBlockCodec(name);
  160. Registry[p->GetName()] = p;
  161. }
  162. TCodecPtr MakeCodec(TStringBuf name) const override {
  163. if (!Registry.contains(name)) {
  164. ythrow TNoCodecException(name);
  165. }
  166. return Registry.find(name)->second;
  167. }
  168. TVector<TString> ListNames() const override {
  169. TVector<TString> res;
  170. for (const auto& it : Registry) {
  171. res.push_back(it.first);
  172. }
  173. return res;
  174. }
  175. };
  176. TCodecRegistry::TCodecRegistry() {
  177. RegisterFactory(new TInstanceFactory<TTrivialCodec>);
  178. RegisterFactory(new TInstanceFactory<TTrivialTrainableCodec>);
  179. RegisterFactory(new TInstanceFactory<THuffmanCodec>);
  180. RegisterFactory(new TInstanceFactory<TPForCodec<ui64, true>>);
  181. RegisterFactory(new TInstanceFactory<TPForCodec<ui32, true>>);
  182. RegisterFactory(new TSolarCodecFactory);
  183. RegisterFactory(new TZStdDictCodecFactory);
  184. RegisterFactory(new TCompTableCodecFactory);
  185. RegisterFactory(new TBlockCodecsFactory);
  186. }
  187. }
  188. void RegisterCodecFactory(TCodecFactoryPtr fact) {
  189. Singleton<NPrivate::TCodecRegistry>()->RegisterFactory(fact);
  190. }
  191. }