descriptor.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. #include "descriptor.h"
  2. #include <library/cpp/json/json_reader.h>
  3. #include <library/cpp/json/json_writer.h>
  4. #include <library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h>
  5. #include <library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h>
  6. #include <library/cpp/protobuf/json/json2proto.h>
  7. #include <library/cpp/protobuf/json/proto2json.h>
  8. #include <library/cpp/string_utils/base64/base64.h>
  9. #include <util/generic/hash.h>
  10. #include <util/generic/queue.h>
  11. #include <util/generic/set.h>
  12. #include <util/generic/vector.h>
  13. #include <util/stream/mem.h>
  14. #include <util/stream/str.h>
  15. #include <util/stream/zlib.h>
  16. #include <util/string/cast.h>
  17. #include <google/protobuf/text_format.h>
  18. #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
  19. #include <google/protobuf/io/coded_stream.h>
  20. using namespace NProtoBuf;
  21. static TString SerializeFileDescriptorSet(const FileDescriptorSet& proto) {
  22. const auto size = proto.ByteSize();
  23. TTempBuf data(size);
  24. proto.SerializeWithCachedSizesToArray((ui8*)data.Data());
  25. TStringStream str;
  26. {
  27. TZLibCompress comp(&str, ZLib::GZip);
  28. comp.Write(data.Data(), size);
  29. }
  30. return str.Str();
  31. }
  32. static bool ParseFileDescriptorSet(const TStringBuf& data, FileDescriptorSet* proto) {
  33. TMemoryInput input(data.data(), data.size());
  34. TString buf = TZLibDecompress(&input).ReadAll();
  35. if (!proto->ParseFromArray(buf.data(), buf.size())) {
  36. return false;
  37. }
  38. return true;
  39. }
  40. TDynamicInfo::TDynamicInfo(TDynamicPrototypePtr dynamicPrototype)
  41. : DynamicPrototype(dynamicPrototype)
  42. , SkipBytes_(0)
  43. {
  44. }
  45. TDynamicInfo::~TDynamicInfo() {
  46. }
  47. TDynamicInfoRef TDynamicInfo::Create(const TStringBuf& typeConfig) {
  48. auto data = ParseTypeConfig(typeConfig);
  49. const TString& meta = Base64Decode(data.Metadata);
  50. const TString& name = data.MessageName;
  51. FileDescriptorSet set;
  52. if (!ParseFileDescriptorSet(meta, &set)) {
  53. ythrow yexception() << "can't parse metadata";
  54. }
  55. auto info = MakeIntrusive<TDynamicInfo>(TDynamicPrototype::Create(set, name, true));
  56. info->EnumFormat_ = data.EnumFormat;
  57. info->ProtoFormat_ = data.ProtoFormat;
  58. info->Recursion_ = data.Recursion;
  59. info->YtMode_ = data.YtMode;
  60. info->SkipBytes_ = data.SkipBytes;
  61. info->OptionalLists_ = data.OptionalLists;
  62. info->SyntaxAware_ = data.SyntaxAware;
  63. info->Deterministic_ = data.Deterministic;
  64. return info;
  65. }
  66. const Descriptor* TDynamicInfo::Descriptor() const {
  67. return DynamicPrototype->GetDescriptor();
  68. }
  69. EEnumFormat TDynamicInfo::GetEnumFormat() const {
  70. return EnumFormat_;
  71. }
  72. ERecursionTraits TDynamicInfo::GetRecursionTraits() const {
  73. return Recursion_;
  74. }
  75. bool TDynamicInfo::GetYtMode() const {
  76. return YtMode_;
  77. }
  78. bool TDynamicInfo::GetOptionalLists() const {
  79. return OptionalLists_;
  80. }
  81. bool TDynamicInfo::GetSyntaxAware() const {
  82. return SyntaxAware_;
  83. }
  84. TAutoPtr<Message> TDynamicInfo::MakeProto() {
  85. return DynamicPrototype->CreateUnsafe();
  86. }
  87. TAutoPtr<Message> TDynamicInfo::Parse(const TStringBuf& data) {
  88. auto mut = MakeProto();
  89. TStringBuf tmp(data);
  90. if (SkipBytes_) {
  91. tmp = TStringBuf(tmp.data() + SkipBytes_, tmp.size() - SkipBytes_);
  92. }
  93. switch (ProtoFormat_) {
  94. case PF_PROTOBIN: {
  95. if (!mut->ParseFromArray(tmp.data(), tmp.size())) {
  96. ythrow yexception() << "can't parse protobin message";
  97. }
  98. break;
  99. }
  100. case PF_PROTOTEXT: {
  101. io::ArrayInputStream si(tmp.data(), tmp.size());
  102. if (!TextFormat::Parse(&si, mut.Get())) {
  103. ythrow yexception() << "can't parse prototext message";
  104. }
  105. break;
  106. }
  107. case PF_JSON: {
  108. NJson::TJsonValue value;
  109. if (NJson::ReadJsonFastTree(tmp, &value)) {
  110. NProtobufJson::Json2Proto(value, *mut);
  111. } else {
  112. ythrow yexception() << "can't parse json value";
  113. }
  114. break;
  115. }
  116. }
  117. return mut;
  118. }
  119. TString TDynamicInfo::Serialize(const Message& proto) {
  120. TString result;
  121. switch (ProtoFormat_) {
  122. case PF_PROTOBIN: {
  123. result.ReserveAndResize(proto.ByteSize());
  124. bool success = false;
  125. if (Deterministic_) {
  126. io::ArrayOutputStream arrOut(result.begin(), result.size());
  127. io::CodedOutputStream codedOut(&arrOut);
  128. codedOut.SetSerializationDeterministic(true);
  129. success = proto.SerializeToCodedStream(&codedOut);
  130. } else {
  131. success = proto.SerializeToArray(result.begin(), result.size());
  132. }
  133. if (!success) {
  134. ythrow yexception() << "can't serialize protobin message";
  135. }
  136. break;
  137. }
  138. case PF_PROTOTEXT: {
  139. if (!TextFormat::PrintToString(proto, &result)) {
  140. ythrow yexception() << "can't serialize prototext message";
  141. }
  142. break;
  143. }
  144. case PF_JSON: {
  145. NJson::TJsonValue value;
  146. NProtobufJson::TProto2JsonConfig config;
  147. config.SetSortMapKeys(Deterministic_);
  148. NProtobufJson::Proto2Json(proto, value, config);
  149. result = NJson::WriteJson(value);
  150. break;
  151. }
  152. }
  153. return result;
  154. }
  155. TString GenerateProtobufTypeConfig(
  156. const Descriptor* descriptor,
  157. const TProtoTypeConfigOptions& options) {
  158. NJson::TJsonValue ret(NJson::JSON_MAP);
  159. ret["name"] = descriptor->full_name();
  160. ret["meta"] = Base64Encode(
  161. SerializeFileDescriptorSet(GenerateFileDescriptorSet(descriptor)));
  162. if (options.SkipBytes > 0) {
  163. ret["skip"] = options.SkipBytes;
  164. }
  165. switch (options.ProtoFormat) {
  166. case PF_PROTOBIN:
  167. break;
  168. case PF_PROTOTEXT:
  169. ret["format"] = "prototext";
  170. break;
  171. case PF_JSON:
  172. ret["format"] = "json";
  173. break;
  174. }
  175. if (!options.OptionalLists) {
  176. ret["lists"]["optional"] = false;
  177. }
  178. if (options.SyntaxAware) {
  179. ret["syntax"]["aware"] = options.SyntaxAware;
  180. }
  181. switch (options.EnumFormat) {
  182. case EEnumFormat::Number:
  183. break;
  184. case EEnumFormat::Name:
  185. ret["view"]["enum"] = "name";
  186. break;
  187. case EEnumFormat::FullName:
  188. ret["view"]["enum"] = "full_name";
  189. break;
  190. }
  191. switch (options.Recursion) {
  192. case ERecursionTraits::Fail:
  193. break;
  194. case ERecursionTraits::Ignore:
  195. ret["view"]["recursion"] = "ignore";
  196. break;
  197. case ERecursionTraits::Bytes:
  198. ret["view"]["recursion"] = "bytes";
  199. break;
  200. }
  201. if (options.YtMode) {
  202. ret["view"]["yt_mode"] = true;
  203. }
  204. if (options.Deterministic) {
  205. ret["view"]["deterministic"] = true;
  206. }
  207. return NJson::WriteJson(ret, false);
  208. }
  209. TProtoTypeConfig ParseTypeConfig(const TStringBuf& config) {
  210. if (config.empty()) {
  211. ythrow yexception() << "empty metadata";
  212. }
  213. switch (config[0]) {
  214. case '#': {
  215. auto plus = config.find('+');
  216. if (config[0] != '#') {
  217. ythrow yexception() << "unknown version of metadata format";
  218. }
  219. if (plus == TStringBuf::npos) {
  220. ythrow yexception() << "invalid metadata";
  221. }
  222. TProtoTypeConfig result;
  223. result.MessageName = TStringBuf(config.begin() + 1, plus - 1);
  224. result.Metadata = TStringBuf(config.begin() + 1 + plus, config.size() - plus - 1);
  225. result.SkipBytes = 0;
  226. return result;
  227. }
  228. case '{': {
  229. NJson::TJsonValue value;
  230. if (NJson::ReadJsonFastTree(config, &value)) {
  231. TProtoTypeConfig result;
  232. TString protoFormat = value["format"].GetStringSafe("protobin");
  233. TString enumFormat = value["view"]["enum"].GetStringSafe("number");
  234. TString recursion = value["view"]["recursion"].GetStringSafe("fail");
  235. result.MessageName = value["name"].GetString();
  236. result.Metadata = value["meta"].GetString();
  237. result.SkipBytes = value["skip"].GetIntegerSafe(0);
  238. result.OptionalLists = value["lists"]["optional"].GetBooleanSafe(true);
  239. result.SyntaxAware = value["syntax"]["aware"].GetBooleanSafe(false);
  240. result.YtMode = value["view"]["yt_mode"].GetBooleanSafe(false);
  241. result.Deterministic = value["view"]["deterministic"].GetBooleanSafe(false);
  242. if (protoFormat == "protobin") {
  243. result.ProtoFormat = PF_PROTOBIN;
  244. } else if (protoFormat == "prototext") {
  245. result.ProtoFormat = PF_PROTOTEXT;
  246. } else if (protoFormat == "json") {
  247. result.ProtoFormat = PF_JSON;
  248. } else {
  249. ythrow yexception() << "unsupported format " << protoFormat;
  250. }
  251. if (enumFormat == "number") {
  252. result.EnumFormat = EEnumFormat::Number;
  253. } else if (enumFormat == "name") {
  254. result.EnumFormat = EEnumFormat::Name;
  255. } else if (enumFormat == "full_name") {
  256. result.EnumFormat = EEnumFormat::FullName;
  257. } else {
  258. ythrow yexception() << "unsupported enum representation "
  259. << enumFormat;
  260. }
  261. if (recursion == "fail") {
  262. result.Recursion = ERecursionTraits::Fail;
  263. } else if (recursion == "ignore") {
  264. result.Recursion = ERecursionTraits::Ignore;
  265. } else if (recursion == "bytes") {
  266. result.Recursion = ERecursionTraits::Bytes;
  267. } else {
  268. ythrow yexception() << "unsupported recursion trait "
  269. << recursion;
  270. }
  271. return result;
  272. } else {
  273. ythrow yexception() << "can't parse json metadata";
  274. }
  275. }
  276. default:
  277. ythrow yexception() << "invalid control char "
  278. << TStringBuf(config.data(), 1);
  279. }
  280. }