descriptor.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. #include "descriptor.h"
  2. #include <library/cpp/json/json_reader.h>
  3. #include <library/cpp/json/json_writer.h>
  4. #include <library/cpp/protobuf/dynamic_prototype/dynamic_prototype.h>
  5. #include <library/cpp/protobuf/dynamic_prototype/generate_file_descriptor_set.h>
  6. #include <library/cpp/protobuf/json/json2proto.h>
  7. #include <library/cpp/protobuf/json/proto2json.h>
  8. #include <library/cpp/string_utils/base64/base64.h>
  9. #include <util/generic/hash.h>
  10. #include <util/generic/queue.h>
  11. #include <util/generic/set.h>
  12. #include <util/generic/vector.h>
  13. #include <util/stream/mem.h>
  14. #include <util/stream/str.h>
  15. #include <util/stream/zlib.h>
  16. #include <util/string/cast.h>
  17. #include <google/protobuf/text_format.h>
  18. #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
  19. using namespace NProtoBuf;
  20. static TString SerializeFileDescriptorSet(const FileDescriptorSet& proto) {
  21. const auto size = proto.ByteSize();
  22. TTempBuf data(size);
  23. proto.SerializeWithCachedSizesToArray((ui8*)data.Data());
  24. TStringStream str;
  25. {
  26. TZLibCompress comp(&str, ZLib::GZip);
  27. comp.Write(data.Data(), size);
  28. }
  29. return str.Str();
  30. }
  31. static bool ParseFileDescriptorSet(const TStringBuf& data, FileDescriptorSet* proto) {
  32. TMemoryInput input(data.data(), data.size());
  33. TString buf = TZLibDecompress(&input).ReadAll();
  34. if (!proto->ParseFromArray(buf.data(), buf.size())) {
  35. return false;
  36. }
  37. return true;
  38. }
  39. TDynamicInfo::TDynamicInfo(TDynamicPrototypePtr dynamicPrototype)
  40. : DynamicPrototype(dynamicPrototype)
  41. , SkipBytes_(0)
  42. {
  43. }
  44. TDynamicInfo::~TDynamicInfo() {
  45. }
  46. TDynamicInfoRef TDynamicInfo::Create(const TStringBuf& typeConfig) {
  47. auto data = ParseTypeConfig(typeConfig);
  48. const TString& meta = Base64Decode(data.Metadata);
  49. const TString& name = data.MessageName;
  50. FileDescriptorSet set;
  51. if (!ParseFileDescriptorSet(meta, &set)) {
  52. ythrow yexception() << "can't parse metadata";
  53. }
  54. auto info = MakeIntrusive<TDynamicInfo>(TDynamicPrototype::Create(set, name, true));
  55. info->EnumFormat_ = data.EnumFormat;
  56. info->ProtoFormat_ = data.ProtoFormat;
  57. info->Recursion_ = data.Recursion;
  58. info->YtMode_ = data.YtMode;
  59. info->SkipBytes_ = data.SkipBytes;
  60. info->OptionalLists_ = data.OptionalLists;
  61. info->SyntaxAware_ = data.SyntaxAware;
  62. return info;
  63. }
  64. const Descriptor* TDynamicInfo::Descriptor() const {
  65. return DynamicPrototype->GetDescriptor();
  66. }
  67. EEnumFormat TDynamicInfo::GetEnumFormat() const {
  68. return EnumFormat_;
  69. }
  70. ERecursionTraits TDynamicInfo::GetRecursionTraits() const {
  71. return Recursion_;
  72. }
  73. bool TDynamicInfo::GetYtMode() const {
  74. return YtMode_;
  75. }
  76. bool TDynamicInfo::GetOptionalLists() const {
  77. return OptionalLists_;
  78. }
  79. bool TDynamicInfo::GetSyntaxAware() const {
  80. return SyntaxAware_;
  81. }
  82. TAutoPtr<Message> TDynamicInfo::MakeProto() {
  83. return DynamicPrototype->CreateUnsafe();
  84. }
  85. TAutoPtr<Message> TDynamicInfo::Parse(const TStringBuf& data) {
  86. auto mut = MakeProto();
  87. TStringBuf tmp(data);
  88. if (SkipBytes_) {
  89. tmp = TStringBuf(tmp.data() + SkipBytes_, tmp.size() - SkipBytes_);
  90. }
  91. switch (ProtoFormat_) {
  92. case PF_PROTOBIN: {
  93. if (!mut->ParseFromArray(tmp.data(), tmp.size())) {
  94. ythrow yexception() << "can't parse protobin message";
  95. }
  96. break;
  97. }
  98. case PF_PROTOTEXT: {
  99. io::ArrayInputStream si(tmp.data(), tmp.size());
  100. if (!TextFormat::Parse(&si, mut.Get())) {
  101. ythrow yexception() << "can't parse prototext message";
  102. }
  103. break;
  104. }
  105. case PF_JSON: {
  106. NJson::TJsonValue value;
  107. if (NJson::ReadJsonFastTree(tmp, &value)) {
  108. NProtobufJson::Json2Proto(value, *mut);
  109. } else {
  110. ythrow yexception() << "can't parse json value";
  111. }
  112. break;
  113. }
  114. }
  115. return mut;
  116. }
  117. TString TDynamicInfo::Serialize(const Message& proto) {
  118. TString result;
  119. switch (ProtoFormat_) {
  120. case PF_PROTOBIN: {
  121. result.ReserveAndResize(proto.ByteSize());
  122. if (!proto.SerializeToArray(result.begin(), result.size())) {
  123. ythrow yexception() << "can't serialize protobin message";
  124. }
  125. break;
  126. }
  127. case PF_PROTOTEXT: {
  128. if (!TextFormat::PrintToString(proto, &result)) {
  129. ythrow yexception() << "can't serialize prototext message";
  130. }
  131. break;
  132. }
  133. case PF_JSON: {
  134. NJson::TJsonValue value;
  135. NProtobufJson::Proto2Json(proto, value);
  136. result = NJson::WriteJson(value);
  137. break;
  138. }
  139. }
  140. return result;
  141. }
  142. TString GenerateProtobufTypeConfig(
  143. const Descriptor* descriptor,
  144. const TProtoTypeConfigOptions& options) {
  145. NJson::TJsonValue ret(NJson::JSON_MAP);
  146. ret["name"] = descriptor->full_name();
  147. ret["meta"] = Base64Encode(
  148. SerializeFileDescriptorSet(GenerateFileDescriptorSet(descriptor)));
  149. if (options.SkipBytes > 0) {
  150. ret["skip"] = options.SkipBytes;
  151. }
  152. switch (options.ProtoFormat) {
  153. case PF_PROTOBIN:
  154. break;
  155. case PF_PROTOTEXT:
  156. ret["format"] = "prototext";
  157. break;
  158. case PF_JSON:
  159. ret["format"] = "json";
  160. break;
  161. }
  162. if (!options.OptionalLists) {
  163. ret["lists"]["optional"] = false;
  164. }
  165. if (options.SyntaxAware) {
  166. ret["syntax"]["aware"] = options.SyntaxAware;
  167. }
  168. switch (options.EnumFormat) {
  169. case EEnumFormat::Number:
  170. break;
  171. case EEnumFormat::Name:
  172. ret["view"]["enum"] = "name";
  173. break;
  174. case EEnumFormat::FullName:
  175. ret["view"]["enum"] = "full_name";
  176. break;
  177. }
  178. switch (options.Recursion) {
  179. case ERecursionTraits::Fail:
  180. break;
  181. case ERecursionTraits::Ignore:
  182. ret["view"]["recursion"] = "ignore";
  183. break;
  184. case ERecursionTraits::Bytes:
  185. ret["view"]["recursion"] = "bytes";
  186. break;
  187. }
  188. if (options.YtMode) {
  189. ret["view"]["yt_mode"] = true;
  190. }
  191. return NJson::WriteJson(ret, false);
  192. }
  193. TProtoTypeConfig ParseTypeConfig(const TStringBuf& config) {
  194. if (config.empty()) {
  195. ythrow yexception() << "empty metadata";
  196. }
  197. switch (config[0]) {
  198. case '#': {
  199. auto plus = config.find('+');
  200. if (config[0] != '#') {
  201. ythrow yexception() << "unknown version of metadata format";
  202. }
  203. if (plus == TStringBuf::npos) {
  204. ythrow yexception() << "invalid metadata";
  205. }
  206. TProtoTypeConfig result;
  207. result.MessageName = TStringBuf(config.begin() + 1, plus - 1);
  208. result.Metadata = TStringBuf(config.begin() + 1 + plus, config.size() - plus - 1);
  209. result.SkipBytes = 0;
  210. return result;
  211. }
  212. case '{': {
  213. NJson::TJsonValue value;
  214. if (NJson::ReadJsonFastTree(config, &value)) {
  215. TProtoTypeConfig result;
  216. TString protoFormat = value["format"].GetStringSafe("protobin");
  217. TString enumFormat = value["view"]["enum"].GetStringSafe("number");
  218. TString recursion = value["view"]["recursion"].GetStringSafe("fail");
  219. result.MessageName = value["name"].GetString();
  220. result.Metadata = value["meta"].GetString();
  221. result.SkipBytes = value["skip"].GetIntegerSafe(0);
  222. result.OptionalLists = value["lists"]["optional"].GetBooleanSafe(true);
  223. result.SyntaxAware = value["syntax"]["aware"].GetBooleanSafe(false);
  224. result.YtMode = value["view"]["yt_mode"].GetBooleanSafe(false);
  225. if (protoFormat == "protobin") {
  226. result.ProtoFormat = PF_PROTOBIN;
  227. } else if (protoFormat == "prototext") {
  228. result.ProtoFormat = PF_PROTOTEXT;
  229. } else if (protoFormat == "json") {
  230. result.ProtoFormat = PF_JSON;
  231. } else {
  232. ythrow yexception() << "unsupported format " << protoFormat;
  233. }
  234. if (enumFormat == "number") {
  235. result.EnumFormat = EEnumFormat::Number;
  236. } else if (enumFormat == "name") {
  237. result.EnumFormat = EEnumFormat::Name;
  238. } else if (enumFormat == "full_name") {
  239. result.EnumFormat = EEnumFormat::FullName;
  240. } else {
  241. ythrow yexception() << "unsupported enum representation "
  242. << enumFormat;
  243. }
  244. if (recursion == "fail") {
  245. result.Recursion = ERecursionTraits::Fail;
  246. } else if (recursion == "ignore") {
  247. result.Recursion = ERecursionTraits::Ignore;
  248. } else if (recursion == "bytes") {
  249. result.Recursion = ERecursionTraits::Bytes;
  250. } else {
  251. ythrow yexception() << "unsupported recursion trait "
  252. << recursion;
  253. }
  254. return result;
  255. } else {
  256. ythrow yexception() << "can't parse json metadata";
  257. }
  258. }
  259. default:
  260. ythrow yexception() << "invalid control char "
  261. << TStringBuf(config.data(), 1);
  262. }
  263. }