schema_from_proto.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #include "schema_from_proto.h"
  2. #include <yt/yt_proto/yt/formats/extension.pb.h>
  3. #include <util/generic/algorithm.h>
  4. #include <util/generic/string.h>
  5. #include <util/string/printf.h>
  6. #include <util/string/vector.h>
  7. namespace pb = google::protobuf;
  8. namespace NYql {
  9. namespace NPureCalc {
  10. TProtoSchemaOptions::TProtoSchemaOptions()
  11. : EnumPolicy(EEnumPolicy::Int32)
  12. , ListIsOptional(false)
  13. {
  14. }
  15. TProtoSchemaOptions& TProtoSchemaOptions::SetEnumPolicy(EEnumPolicy policy) {
  16. EnumPolicy = policy;
  17. return *this;
  18. }
  19. TProtoSchemaOptions& TProtoSchemaOptions::SetListIsOptional(bool value) {
  20. ListIsOptional = value;
  21. return *this;
  22. }
  23. TProtoSchemaOptions& TProtoSchemaOptions::SetFieldRenames(
  24. THashMap<TString, TString> fieldRenames
  25. ) {
  26. FieldRenames = std::move(fieldRenames);
  27. return *this;
  28. }
  29. namespace {
  30. EEnumFormatType EnumFormatTypeWithYTFlag(const pb::FieldDescriptor& enumField, EEnumFormatType defaultEnumFormatType) {
  31. auto flags = enumField.options().GetRepeatedExtension(NYT::flags);
  32. for (auto flag : flags) {
  33. if (flag == NYT::EWrapperFieldFlag::ENUM_INT) {
  34. return EEnumFormatType::Int32;
  35. } else if (flag == NYT::EWrapperFieldFlag::ENUM_STRING) {
  36. return EEnumFormatType::String;
  37. }
  38. }
  39. return defaultEnumFormatType;
  40. }
  41. }
  42. EEnumFormatType EnumFormatType(const pb::FieldDescriptor& enumField, EEnumPolicy enumPolicy) {
  43. switch (enumPolicy) {
  44. case EEnumPolicy::Int32:
  45. return EEnumFormatType::Int32;
  46. case EEnumPolicy::String:
  47. return EEnumFormatType::String;
  48. case EEnumPolicy::YTFlagDefaultInt32:
  49. return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::Int32);
  50. case EEnumPolicy::YTFlagDefaultString:
  51. return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::String);
  52. }
  53. }
  54. namespace {
  55. const char* FormatTypeName(const pb::FieldDescriptor* field, EEnumPolicy enumPolicy) {
  56. switch (field->type()) {
  57. case pb::FieldDescriptor::TYPE_DOUBLE:
  58. return "Double";
  59. case pb::FieldDescriptor::TYPE_FLOAT:
  60. return "Float";
  61. case pb::FieldDescriptor::TYPE_INT64:
  62. case pb::FieldDescriptor::TYPE_SFIXED64:
  63. case pb::FieldDescriptor::TYPE_SINT64:
  64. return "Int64";
  65. case pb::FieldDescriptor::TYPE_UINT64:
  66. case pb::FieldDescriptor::TYPE_FIXED64:
  67. return "Uint64";
  68. case pb::FieldDescriptor::TYPE_INT32:
  69. case pb::FieldDescriptor::TYPE_SFIXED32:
  70. case pb::FieldDescriptor::TYPE_SINT32:
  71. return "Int32";
  72. case pb::FieldDescriptor::TYPE_UINT32:
  73. case pb::FieldDescriptor::TYPE_FIXED32:
  74. return "Uint32";
  75. case pb::FieldDescriptor::TYPE_BOOL:
  76. return "Bool";
  77. case pb::FieldDescriptor::TYPE_STRING:
  78. return "Utf8";
  79. case pb::FieldDescriptor::TYPE_BYTES:
  80. return "String";
  81. case pb::FieldDescriptor::TYPE_ENUM:
  82. switch (EnumFormatType(*field, enumPolicy)) {
  83. case EEnumFormatType::Int32:
  84. return "Int32";
  85. case EEnumFormatType::String:
  86. return "String";
  87. }
  88. default:
  89. ythrow yexception() << "Unsupported protobuf type: " << field->type_name()
  90. << ", field: " << field->name() << ", " << int(field->type());
  91. }
  92. }
  93. }
  94. NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, TVector<const pb::Descriptor*>& nested, const TProtoSchemaOptions& options) {
  95. if (Find(nested, &descriptor) != nested.end()) {
  96. TVector<TString> nestedNames;
  97. for (const auto* d : nested) {
  98. nestedNames.push_back(d->full_name());
  99. }
  100. nestedNames.push_back(descriptor.full_name());
  101. ythrow yexception() << Sprintf("recursive messages are not supported (%s)",
  102. JoinStrings(nestedNames, "->").c_str());
  103. }
  104. nested.push_back(&descriptor);
  105. auto items = NYT::TNode::CreateList();
  106. for (int fieldNo = 0; fieldNo < descriptor.field_count(); ++fieldNo) {
  107. const auto& fieldDescriptor = *descriptor.field(fieldNo);
  108. auto name = fieldDescriptor.name();
  109. if (
  110. auto renamePtr = options.FieldRenames.FindPtr(name);
  111. nested.size() == 1 && renamePtr
  112. ) {
  113. name = *renamePtr;
  114. }
  115. NYT::TNode itemType;
  116. if (fieldDescriptor.type() == pb::FieldDescriptor::TYPE_MESSAGE) {
  117. itemType = MakeSchemaFromProto(*fieldDescriptor.message_type(), nested, options);
  118. } else {
  119. itemType = NYT::TNode::CreateList();
  120. itemType.Add("DataType");
  121. itemType.Add(FormatTypeName(&fieldDescriptor, options.EnumPolicy));
  122. }
  123. switch (fieldDescriptor.label()) {
  124. case pb::FieldDescriptor::LABEL_OPTIONAL:
  125. {
  126. auto optionalType = NYT::TNode::CreateList();
  127. optionalType.Add("OptionalType");
  128. optionalType.Add(std::move(itemType));
  129. itemType = std::move(optionalType);
  130. }
  131. break;
  132. case pb::FieldDescriptor::LABEL_REQUIRED:
  133. break;
  134. case pb::FieldDescriptor::LABEL_REPEATED:
  135. {
  136. auto listType = NYT::TNode::CreateList();
  137. listType.Add("ListType");
  138. listType.Add(std::move(itemType));
  139. itemType = std::move(listType);
  140. if (options.ListIsOptional) {
  141. itemType = NYT::TNode::CreateList().Add("OptionalType").Add(std::move(itemType));
  142. }
  143. }
  144. break;
  145. default:
  146. ythrow yexception() << "Unknown protobuf label: " << (ui32)fieldDescriptor.label() << ", field: " << name;
  147. }
  148. auto itemNode = NYT::TNode::CreateList();
  149. itemNode.Add(name);
  150. itemNode.Add(std::move(itemType));
  151. items.Add(std::move(itemNode));
  152. }
  153. auto root = NYT::TNode::CreateList();
  154. root.Add("StructType");
  155. root.Add(std::move(items));
  156. nested.pop_back();
  157. return root;
  158. }
  159. NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, const TProtoSchemaOptions& options) {
  160. TVector<const pb::Descriptor*> nested;
  161. return MakeSchemaFromProto(descriptor, nested, options);
  162. }
  163. NYT::TNode MakeVariantSchemaFromProtos(const TVector<const pb::Descriptor*>& descriptors, const TProtoSchemaOptions& options) {
  164. Y_ENSURE(options.FieldRenames.empty(), "Renames are not supported in variant mode");
  165. auto tupleItems = NYT::TNode::CreateList();
  166. for (auto descriptor : descriptors) {
  167. tupleItems.Add(MakeSchemaFromProto(*descriptor, options));
  168. }
  169. auto tupleType = NYT::TNode::CreateList();
  170. tupleType.Add("TupleType");
  171. tupleType.Add(std::move(tupleItems));
  172. auto variantType = NYT::TNode::CreateList();
  173. variantType.Add("VariantType");
  174. variantType.Add(std::move(tupleType));
  175. return variantType;
  176. }
  177. }
  178. }