123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500 |
- #include "protobuf_format.h"
- #include "errors.h"
- #include <yt/yt/core/misc/protobuf_helpers.h>
- #include <yt/yt_proto/yt/formats/extension.pb.h>
- #include <google/protobuf/text_format.h>
- #include <library/cpp/yson/node/node_io.h>
- #include <util/generic/hash_set.h>
- #include <util/generic/stack.h>
- #include <util/generic/overloaded.h>
- #include <util/stream/output.h>
- #include <util/stream/file.h>
- namespace NYT::NDetail {
- using ::google::protobuf::Descriptor;
- using ::google::protobuf::DescriptorProto;
- using ::google::protobuf::EnumDescriptor;
- using ::google::protobuf::EnumDescriptorProto;
- using ::google::protobuf::FieldDescriptor;
- using ::google::protobuf::FieldDescriptorProto;
- using ::google::protobuf::OneofDescriptor;
- using ::google::protobuf::Message;
- using ::google::protobuf::FileDescriptor;
- using ::google::protobuf::FileDescriptorProto;
- using ::google::protobuf::FileDescriptorSet;
- using ::google::protobuf::FieldOptions;
- using ::google::protobuf::FileOptions;
- using ::google::protobuf::OneofOptions;
- using ::google::protobuf::MessageOptions;
- using ::ToString;
- namespace {
- ////////////////////////////////////////////////////////////////////////////////
- using TOneofOption = std::variant<
- EProtobufOneofMode>;
- using TFieldOption = std::variant<
- EProtobufType,
- EProtobufSerializationMode,
- EProtobufListMode,
- EProtobufMapMode,
- EProtobufEnumWritingMode>;
- using TMessageOption = std::variant<
- EProtobufFieldSortOrder>;
- struct TOtherColumns
- { };
- using TValueTypeOrOtherColumns = std::variant<EValueType, TOtherColumns>;
- ////////////////////////////////////////////////////////////////////////////////
- TFieldOption FieldFlagToOption(EWrapperFieldFlag::Enum flag)
- {
- using EFlag = EWrapperFieldFlag;
- switch (flag) {
- case EFlag::SERIALIZATION_PROTOBUF:
- return EProtobufSerializationMode::Protobuf;
- case EFlag::SERIALIZATION_YT:
- return EProtobufSerializationMode::Yt;
- case EFlag::ANY:
- return EProtobufType::Any;
- case EFlag::OTHER_COLUMNS:
- return EProtobufType::OtherColumns;
- case EFlag::ENUM_INT:
- return EProtobufType::EnumInt;
- case EFlag::ENUM_STRING:
- return EProtobufType::EnumString;
- case EFlag::OPTIONAL_LIST:
- return EProtobufListMode::Optional;
- case EFlag::REQUIRED_LIST:
- return EProtobufListMode::Required;
- case EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY:
- return EProtobufMapMode::ListOfStructsLegacy;
- case EFlag::MAP_AS_LIST_OF_STRUCTS:
- return EProtobufMapMode::ListOfStructs;
- case EFlag::MAP_AS_DICT:
- return EProtobufMapMode::Dict;
- case EFlag::MAP_AS_OPTIONAL_DICT:
- return EProtobufMapMode::OptionalDict;
- case EFlag::EMBEDDED:
- return EProtobufSerializationMode::Embedded;
- case EFlag::ENUM_SKIP_UNKNOWN_VALUES:
- return EProtobufEnumWritingMode::SkipUnknownValues;
- case EFlag::ENUM_CHECK_VALUES:
- return EProtobufEnumWritingMode::CheckValues;
- }
- Y_ABORT();
- }
- TMessageOption MessageFlagToOption(EWrapperMessageFlag::Enum flag)
- {
- using EFlag = EWrapperMessageFlag;
- switch (flag) {
- case EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE:
- return EProtobufFieldSortOrder::AsInProtoFile;
- case EFlag::SORT_FIELDS_BY_FIELD_NUMBER:
- return EProtobufFieldSortOrder::ByFieldNumber;
- }
- Y_ABORT();
- }
- TOneofOption OneofFlagToOption(EWrapperOneofFlag::Enum flag)
- {
- using EFlag = EWrapperOneofFlag;
- switch (flag) {
- case EFlag::SEPARATE_FIELDS:
- return EProtobufOneofMode::SeparateFields;
- case EFlag::VARIANT:
- return EProtobufOneofMode::Variant;
- }
- Y_ABORT();
- }
- EWrapperFieldFlag::Enum OptionToFieldFlag(TFieldOption option)
- {
- using EFlag = EWrapperFieldFlag;
- struct TVisitor
- {
- EFlag::Enum operator() (EProtobufType type)
- {
- switch (type) {
- case EProtobufType::Any:
- return EFlag::ANY;
- case EProtobufType::OtherColumns:
- return EFlag::OTHER_COLUMNS;
- case EProtobufType::EnumInt:
- return EFlag::ENUM_INT;
- case EProtobufType::EnumString:
- return EFlag::ENUM_STRING;
- }
- Y_ABORT();
- }
- EFlag::Enum operator() (EProtobufSerializationMode serializationMode)
- {
- switch (serializationMode) {
- case EProtobufSerializationMode::Yt:
- return EFlag::SERIALIZATION_YT;
- case EProtobufSerializationMode::Protobuf:
- return EFlag::SERIALIZATION_PROTOBUF;
- case EProtobufSerializationMode::Embedded:
- return EFlag::EMBEDDED;
- }
- Y_ABORT();
- }
- EFlag::Enum operator() (EProtobufListMode listMode)
- {
- switch (listMode) {
- case EProtobufListMode::Optional:
- return EFlag::OPTIONAL_LIST;
- case EProtobufListMode::Required:
- return EFlag::REQUIRED_LIST;
- }
- Y_ABORT();
- }
- EFlag::Enum operator() (EProtobufMapMode mapMode)
- {
- switch (mapMode) {
- case EProtobufMapMode::ListOfStructsLegacy:
- return EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY;
- case EProtobufMapMode::ListOfStructs:
- return EFlag::MAP_AS_LIST_OF_STRUCTS;
- case EProtobufMapMode::Dict:
- return EFlag::MAP_AS_DICT;
- case EProtobufMapMode::OptionalDict:
- return EFlag::MAP_AS_OPTIONAL_DICT;
- }
- Y_ABORT();
- }
- EFlag::Enum operator() (EProtobufEnumWritingMode enumWritingMode)
- {
- switch (enumWritingMode) {
- case EProtobufEnumWritingMode::SkipUnknownValues:
- return EFlag::ENUM_SKIP_UNKNOWN_VALUES;
- case EProtobufEnumWritingMode::CheckValues:
- return EFlag::ENUM_CHECK_VALUES;
- }
- Y_ABORT();
- }
- };
- return std::visit(TVisitor(), option);
- }
- EWrapperMessageFlag::Enum OptionToMessageFlag(TMessageOption option)
- {
- using EFlag = EWrapperMessageFlag;
- struct TVisitor
- {
- EFlag::Enum operator() (EProtobufFieldSortOrder sortOrder)
- {
- switch (sortOrder) {
- case EProtobufFieldSortOrder::AsInProtoFile:
- return EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
- case EProtobufFieldSortOrder::ByFieldNumber:
- return EFlag::SORT_FIELDS_BY_FIELD_NUMBER;
- }
- Y_ABORT();
- }
- };
- return std::visit(TVisitor(), option);
- }
- EWrapperOneofFlag::Enum OptionToOneofFlag(TOneofOption option)
- {
- using EFlag = EWrapperOneofFlag;
- struct TVisitor
- {
- EFlag::Enum operator() (EProtobufOneofMode mode)
- {
- switch (mode) {
- case EProtobufOneofMode::SeparateFields:
- return EFlag::SEPARATE_FIELDS;
- case EProtobufOneofMode::Variant:
- return EFlag::VARIANT;
- }
- Y_ABORT();
- }
- };
- return std::visit(TVisitor(), option);
- }
- template <typename T, typename TOptionToFlag>
- void SetOption(TMaybe<T>& option, T newOption, TOptionToFlag optionToFlag)
- {
- if (option) {
- if (*option == newOption) {
- ythrow yexception() << "Duplicate protobuf flag " << optionToFlag(newOption);
- } else {
- ythrow yexception() << "Incompatible protobuf flags " <<
- optionToFlag(*option) << " and " << optionToFlag(newOption);
- }
- }
- option = newOption;
- }
- class TParseProtobufFieldOptionsVisitor
- {
- public:
- void operator() (EProtobufType type)
- {
- SetOption(Type, type);
- }
- void operator() (EProtobufSerializationMode serializationMode)
- {
- SetOption(SerializationMode, serializationMode);
- }
- void operator() (EProtobufListMode listMode)
- {
- SetOption(ListMode, listMode);
- }
- void operator() (EProtobufMapMode mapMode)
- {
- SetOption(MapMode, mapMode);
- }
- void operator() (EProtobufEnumWritingMode enumWritingMode)
- {
- SetOption(EnumWritingMode, enumWritingMode);
- }
- template <typename T>
- void SetOption(TMaybe<T>& option, T newOption)
- {
- NYT::NDetail::SetOption(option, newOption, OptionToFieldFlag);
- }
- public:
- TMaybe<EProtobufType> Type;
- TMaybe<EProtobufSerializationMode> SerializationMode;
- TMaybe<EProtobufListMode> ListMode;
- TMaybe<EProtobufMapMode> MapMode;
- TMaybe<EProtobufEnumWritingMode> EnumWritingMode;
- };
- class TParseProtobufMessageOptionsVisitor
- {
- public:
- void operator() (EProtobufFieldSortOrder fieldSortOrder)
- {
- SetOption(FieldSortOrder, fieldSortOrder);
- }
- template <typename T>
- void SetOption(TMaybe<T>& option, T newOption)
- {
- NYT::NDetail::SetOption(option, newOption, OptionToMessageFlag);
- }
- public:
- TMaybe<EProtobufFieldSortOrder> FieldSortOrder;
- };
- class TParseProtobufOneofOptionsVisitor
- {
- public:
- void operator() (EProtobufOneofMode mode)
- {
- SetOption(Mode, mode);
- }
- template <typename T>
- void SetOption(TMaybe<T>& option, T newOption)
- {
- NYT::NDetail::SetOption(option, newOption, OptionToOneofFlag);
- }
- public:
- TMaybe<EProtobufOneofMode> Mode;
- };
- void ParseProtobufFieldOptions(
- const ::google::protobuf::RepeatedField<EWrapperFieldFlag::Enum>& flags,
- TProtobufFieldOptions* fieldOptions)
- {
- TParseProtobufFieldOptionsVisitor visitor;
- for (auto flag : flags) {
- std::visit(visitor, FieldFlagToOption(flag));
- }
- if (visitor.Type) {
- fieldOptions->Type = *visitor.Type;
- }
- if (visitor.SerializationMode) {
- fieldOptions->SerializationMode = *visitor.SerializationMode;
- }
- if (visitor.ListMode) {
- fieldOptions->ListMode = *visitor.ListMode;
- }
- if (visitor.MapMode) {
- fieldOptions->MapMode = *visitor.MapMode;
- }
- }
- void ParseProtobufMessageOptions(
- const ::google::protobuf::RepeatedField<EWrapperMessageFlag::Enum>& flags,
- TProtobufMessageOptions* messageOptions)
- {
- TParseProtobufMessageOptionsVisitor visitor;
- for (auto flag : flags) {
- std::visit(visitor, MessageFlagToOption(flag));
- }
- if (visitor.FieldSortOrder) {
- messageOptions->FieldSortOrder = *visitor.FieldSortOrder;
- }
- }
- void ParseProtobufOneofOptions(
- const ::google::protobuf::RepeatedField<EWrapperOneofFlag::Enum>& flags,
- TProtobufOneofOptions* messageOptions)
- {
- TParseProtobufOneofOptionsVisitor visitor;
- for (auto flag : flags) {
- std::visit(visitor, OneofFlagToOption(flag));
- }
- if (visitor.Mode) {
- messageOptions->Mode = *visitor.Mode;
- }
- }
- TProtobufFieldOptions GetDefaultFieldOptions(
- const Descriptor* descriptor,
- TProtobufFieldOptions defaultFieldOptions = {})
- {
- ParseProtobufFieldOptions(
- descriptor->file()->options().GetRepeatedExtension(file_default_field_flags),
- &defaultFieldOptions);
- ParseProtobufFieldOptions(
- descriptor->options().GetRepeatedExtension(default_field_flags),
- &defaultFieldOptions);
- return defaultFieldOptions;
- }
- TProtobufOneofOptions GetDefaultOneofOptions(const Descriptor* descriptor)
- {
- TProtobufOneofOptions defaultOneofOptions;
- ParseProtobufOneofOptions(
- descriptor->file()->options().GetRepeatedExtension(file_default_oneof_flags),
- &defaultOneofOptions);
- ParseProtobufOneofOptions(
- descriptor->options().GetRepeatedExtension(default_oneof_flags),
- &defaultOneofOptions);
- switch (defaultOneofOptions.Mode) {
- case EProtobufOneofMode::Variant: {
- auto defaultFieldOptions = GetDefaultFieldOptions(descriptor);
- switch (defaultFieldOptions.SerializationMode) {
- case EProtobufSerializationMode::Protobuf:
- // For Protobuf serialization mode default is SeparateFields.
- defaultOneofOptions.Mode = EProtobufOneofMode::SeparateFields;
- return defaultOneofOptions;
- case EProtobufSerializationMode::Yt:
- case EProtobufSerializationMode::Embedded:
- return defaultOneofOptions;
- }
- Y_ABORT();
- }
- case EProtobufOneofMode::SeparateFields:
- return defaultOneofOptions;
- }
- Y_ABORT();
- }
- ////////////////////////////////////////////////////////////////////////////////
- void ValidateProtobufType(const FieldDescriptor& fieldDescriptor, EProtobufType protobufType)
- {
- const auto fieldType = fieldDescriptor.type();
- auto ensureType = [&] (FieldDescriptor::Type expectedType) {
- Y_ENSURE(fieldType == expectedType,
- "Type of field " << fieldDescriptor.name() << "does not match specified field flag " <<
- OptionToFieldFlag(protobufType) << ": "
- "expected " << FieldDescriptor::TypeName(expectedType) << ", " <<
- "got " << FieldDescriptor::TypeName(fieldType));
- };
- switch (protobufType) {
- case EProtobufType::Any:
- ensureType(FieldDescriptor::TYPE_BYTES);
- return;
- case EProtobufType::OtherColumns:
- ensureType(FieldDescriptor::TYPE_BYTES);
- return;
- case EProtobufType::EnumInt:
- ensureType(FieldDescriptor::TYPE_ENUM);
- return;
- case EProtobufType::EnumString:
- ensureType(FieldDescriptor::TYPE_ENUM);
- return;
- }
- Y_ABORT();
- }
- ////////////////////////////////////////////////////////////////////////////////
- class TCycleChecker
- {
- private:
- class TGuard
- {
- public:
- TGuard(TCycleChecker* checker, const Descriptor* descriptor)
- : Checker_(checker)
- , Descriptor_(descriptor)
- {
- Checker_->ActiveVertices_.insert(Descriptor_);
- Checker_->Stack_.push(Descriptor_);
- }
- ~TGuard()
- {
- Checker_->ActiveVertices_.erase(Descriptor_);
- Checker_->Stack_.pop();
- }
- private:
- TCycleChecker* Checker_;
- const Descriptor* Descriptor_;
- };
- public:
- [[nodiscard]] TGuard Enter(const Descriptor* descriptor)
- {
- if (ActiveVertices_.contains(descriptor)) {
- Y_ABORT_UNLESS(!Stack_.empty());
- ythrow TApiUsageError() << "Cyclic reference found for protobuf messages. " <<
- "Consider removing " << EWrapperFieldFlag::SERIALIZATION_YT << " flag " <<
- "somewhere on the cycle containing " <<
- Stack_.top()->full_name() << " and " << descriptor->full_name();
- }
- return TGuard(this, descriptor);
- }
- private:
- THashSet<const Descriptor*> ActiveVertices_;
- TStack<const Descriptor*> Stack_;
- };
- ////////////////////////////////////////////////////////////////////////////////
- } // namespace
- ////////////////////////////////////////////////////////////////////////////////
- TProtobufFieldOptions GetFieldOptions(
- const FieldDescriptor* fieldDescriptor,
- const TMaybe<TProtobufFieldOptions>& defaultFieldOptions)
- {
- TProtobufFieldOptions options;
- if (defaultFieldOptions) {
- options = *defaultFieldOptions;
- } else {
- options = GetDefaultFieldOptions(fieldDescriptor->containing_type());
- }
- ParseProtobufFieldOptions(fieldDescriptor->options().GetRepeatedExtension(flags), &options);
- return options;
- }
- TProtobufOneofOptions GetOneofOptions(
- const OneofDescriptor* oneofDescriptor,
- const TMaybe<TProtobufOneofOptions>& defaultOneofOptions)
- {
- TProtobufOneofOptions options;
- if (defaultOneofOptions) {
- options = *defaultOneofOptions;
- } else {
- options = GetDefaultOneofOptions(oneofDescriptor->containing_type());
- }
- ParseProtobufOneofOptions(oneofDescriptor->options().GetRepeatedExtension(oneof_flags), &options);
- if (oneofDescriptor->is_synthetic()) {
- options.Mode = EProtobufOneofMode::SeparateFields;
- }
- auto variantFieldName = oneofDescriptor->options().GetExtension(variant_field_name);
- switch (options.Mode) {
- case EProtobufOneofMode::SeparateFields:
- if (!variantFieldName.empty()) {
- ythrow TApiUsageError() << "\"variant_field_name\" requires (NYT.oneof_flags) = VARIANT";
- }
- break;
- case EProtobufOneofMode::Variant:
- if (variantFieldName.empty()) {
- options.VariantFieldName = FromProto<TString>(oneofDescriptor->name());
- } else {
- options.VariantFieldName = variantFieldName;
- }
- break;
- }
- return options;
- }
- TProtobufMessageOptions GetMessageOptions(const Descriptor* descriptor)
- {
- TProtobufMessageOptions options;
- ParseProtobufMessageOptions(
- descriptor->file()->options().GetRepeatedExtension(file_default_message_flags),
- &options);
- ParseProtobufMessageOptions(
- descriptor->options().GetRepeatedExtension(message_flags),
- &options);
- return options;
- }
- TNode MakeEnumerationConfig(const ::google::protobuf::EnumDescriptor* enumDescriptor)
- {
- auto config = TNode::CreateMap();
- for (int i = 0; i < enumDescriptor->value_count(); ++i) {
- config[enumDescriptor->value(i)->name()] = enumDescriptor->value(i)->number();
- }
- return config;
- }
- TString DeduceProtobufType(
- const FieldDescriptor* fieldDescriptor,
- const TProtobufFieldOptions& options)
- {
- if (options.Type) {
- ValidateProtobufType(*fieldDescriptor, *options.Type);
- return ToString(*options.Type);
- }
- switch (fieldDescriptor->type()) {
- case FieldDescriptor::TYPE_ENUM:
- return ToString(EProtobufType::EnumString);
- case FieldDescriptor::TYPE_MESSAGE:
- switch (options.SerializationMode) {
- case EProtobufSerializationMode::Protobuf:
- return "message";
- case EProtobufSerializationMode::Yt:
- return "structured_message";
- case EProtobufSerializationMode::Embedded:
- return "embedded_message";
- }
- Y_ABORT();
- default:
- return fieldDescriptor->type_name();
- }
- Y_ABORT();
- }
- TString GetColumnName(const ::google::protobuf::FieldDescriptor& field)
- {
- const auto& options = field.options();
- const auto columnName = FromProto<TString>(options.GetExtension(column_name));
- if (!columnName.empty()) {
- return columnName;
- }
- const auto keyColumnName = FromProto<TString>(options.GetExtension(key_column_name));
- if (!keyColumnName.empty()) {
- return keyColumnName;
- }
- return FromProto<TString>(field.name());
- }
- TNode MakeProtoFormatMessageFieldsConfig(
- const Descriptor* descriptor,
- TNode* enumerations,
- TCycleChecker& cycleChecker);
- TNode MakeProtoFormatMessageFieldsConfig(
- const Descriptor* descriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- TCycleChecker& cycleChecker);
- TNode MakeMapFieldsConfig(
- const FieldDescriptor* fieldDescriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& fieldOptions,
- TCycleChecker& cycleChecker)
- {
- Y_ABORT_UNLESS(fieldDescriptor->is_map());
- auto message = fieldDescriptor->message_type();
- switch (fieldOptions.MapMode) {
- case EProtobufMapMode::ListOfStructsLegacy:
- return MakeProtoFormatMessageFieldsConfig(
- message,
- enumerations,
- cycleChecker);
- case EProtobufMapMode::ListOfStructs:
- case EProtobufMapMode::Dict:
- case EProtobufMapMode::OptionalDict: {
- TProtobufFieldOptions defaultFieldOptions;
- defaultFieldOptions.SerializationMode = EProtobufSerializationMode::Yt;
- return MakeProtoFormatMessageFieldsConfig(
- message,
- enumerations,
- defaultFieldOptions,
- TProtobufOneofOptions{},
- cycleChecker);
- }
- }
- Y_ABORT();
- }
- TNode MakeProtoFormatFieldConfig(
- const FieldDescriptor* fieldDescriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& defaultOptions,
- TCycleChecker& cycleChecker)
- {
- auto fieldConfig = TNode::CreateMap();
- fieldConfig["field_number"] = fieldDescriptor->number();
- fieldConfig["name"] = GetColumnName(*fieldDescriptor);
- auto fieldOptions = GetFieldOptions(fieldDescriptor, defaultOptions);
- Y_ENSURE(fieldOptions.SerializationMode != EProtobufSerializationMode::Embedded,
- "EMBEDDED flag is currently supported only with "
- "ProtobufFormatWithDescriptors config option set to true");
- if (fieldDescriptor->is_repeated()) {
- Y_ENSURE_EX(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt,
- TApiUsageError() << "Repeated field \"" << fieldDescriptor->full_name() << "\" " <<
- "must have flag \"" << EWrapperFieldFlag::SERIALIZATION_YT << "\"");
- }
- fieldConfig["repeated"] = fieldDescriptor->is_repeated();
- fieldConfig["packed"] = fieldDescriptor->is_packed();
- fieldConfig["proto_type"] = DeduceProtobufType(fieldDescriptor, fieldOptions);
- if (fieldDescriptor->type() == FieldDescriptor::TYPE_ENUM) {
- auto* enumeration = fieldDescriptor->enum_type();
- (*enumerations)[enumeration->full_name()] = MakeEnumerationConfig(enumeration);
- fieldConfig["enumeration_name"] = FromProto<TString>(enumeration->full_name());
- }
- if (fieldOptions.SerializationMode != EProtobufSerializationMode::Yt) {
- return fieldConfig;
- }
- if (fieldDescriptor->is_map()) {
- fieldConfig["fields"] = MakeMapFieldsConfig(fieldDescriptor, enumerations, fieldOptions, cycleChecker);
- return fieldConfig;
- }
- if (fieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE) {
- fieldConfig["fields"] = MakeProtoFormatMessageFieldsConfig(
- fieldDescriptor->message_type(),
- enumerations,
- cycleChecker);
- }
- return fieldConfig;
- }
- void MakeProtoFormatOneofConfig(
- const OneofDescriptor* oneofDescriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- TCycleChecker& cycleChecker,
- TNode* fields)
- {
- auto addFields = [&] (TNode* fields) {
- for (int i = 0; i < oneofDescriptor->field_count(); ++i) {
- fields->Add(MakeProtoFormatFieldConfig(
- oneofDescriptor->field(i),
- enumerations,
- defaultFieldOptions,
- cycleChecker));
- }
- };
- auto oneofOptions = GetOneofOptions(oneofDescriptor, defaultOneofOptions);
- switch (oneofOptions.Mode) {
- case EProtobufOneofMode::SeparateFields:
- addFields(fields);
- return;
- case EProtobufOneofMode::Variant: {
- auto oneofFields = TNode::CreateList();
- addFields(&oneofFields);
- auto oneofField = TNode()
- ("proto_type", "oneof")
- ("name", oneofOptions.VariantFieldName)
- ("fields", std::move(oneofFields));
- fields->Add(std::move(oneofField));
- return;
- }
- }
- Y_ABORT();
- }
- TNode MakeProtoFormatMessageFieldsConfig(
- const Descriptor* descriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- TCycleChecker& cycleChecker)
- {
- auto fields = TNode::CreateList();
- THashSet<const OneofDescriptor*> visitedOneofs;
- auto guard = cycleChecker.Enter(descriptor);
- for (int fieldIndex = 0; fieldIndex < descriptor->field_count(); ++fieldIndex) {
- auto fieldDescriptor = descriptor->field(fieldIndex);
- auto oneofDescriptor = fieldDescriptor->containing_oneof();
- if (!oneofDescriptor) {
- fields.Add(MakeProtoFormatFieldConfig(
- fieldDescriptor,
- enumerations,
- defaultFieldOptions,
- cycleChecker));
- } else if (!visitedOneofs.contains(oneofDescriptor)) {
- MakeProtoFormatOneofConfig(
- oneofDescriptor,
- enumerations,
- defaultFieldOptions,
- defaultOneofOptions,
- cycleChecker,
- &fields);
- visitedOneofs.insert(oneofDescriptor);
- }
- }
- return fields;
- }
- TNode MakeProtoFormatMessageFieldsConfig(
- const Descriptor* descriptor,
- TNode* enumerations,
- TCycleChecker& cycleChecker)
- {
- return MakeProtoFormatMessageFieldsConfig(
- descriptor,
- enumerations,
- GetDefaultFieldOptions(descriptor),
- GetDefaultOneofOptions(descriptor),
- cycleChecker);
- }
- TNode MakeProtoFormatConfigWithTables(const TVector<const Descriptor*>& descriptors)
- {
- TNode config("protobuf");
- config.Attributes()
- ("enumerations", TNode::CreateMap())
- ("tables", TNode::CreateList());
- auto& enumerations = config.Attributes()["enumerations"];
- for (auto* descriptor : descriptors) {
- TCycleChecker cycleChecker;
- auto columns = MakeProtoFormatMessageFieldsConfig(descriptor, &enumerations, cycleChecker);
- config.Attributes()["tables"].Add(
- TNode()("columns", std::move(columns)));
- }
- return config;
- }
- ////////////////////////////////////////////////////////////////////////////////
- class TFileDescriptorSetBuilder
- {
- public:
- TFileDescriptorSetBuilder()
- : ExtensionFile_(EWrapperFieldFlag::descriptor()->file())
- { }
- void AddDescriptor(const Descriptor* descriptor)
- {
- auto [it, inserted] = AllDescriptors_.insert(descriptor);
- if (!inserted) {
- return;
- }
- const auto* containingType = descriptor->containing_type();
- while (containingType) {
- AddDescriptor(containingType);
- containingType = containingType->containing_type();
- }
- for (int i = 0; i < descriptor->field_count(); ++i) {
- AddField(descriptor->field(i));
- }
- }
- FileDescriptorSet Build()
- {
- THashSet<const FileDescriptor*> visitedFiles;
- TVector<const FileDescriptor*> fileTopoOrder;
- for (const auto* descriptor : AllDescriptors_) {
- TraverseDependencies(descriptor->file(), visitedFiles, fileTopoOrder);
- }
- THashSet<TString> messageTypeNames;
- THashSet<TString> enumTypeNames;
- for (const auto* descriptor : AllDescriptors_) {
- messageTypeNames.insert(FromProto<TString>(descriptor->full_name()));
- }
- for (const auto* enumDescriptor : EnumDescriptors_) {
- enumTypeNames.insert(FromProto<TString>(enumDescriptor->full_name()));
- }
- FileDescriptorSet fileDescriptorSetProto;
- for (const auto* file : fileTopoOrder) {
- auto* fileProto = fileDescriptorSetProto.add_file();
- file->CopyTo(fileProto);
- Strip(fileProto, messageTypeNames, enumTypeNames);
- }
- return fileDescriptorSetProto;
- }
- private:
- void AddField(const FieldDescriptor* fieldDescriptor)
- {
- if (fieldDescriptor->message_type()) {
- AddDescriptor(fieldDescriptor->message_type());
- }
- if (fieldDescriptor->enum_type()) {
- AddEnumDescriptor(fieldDescriptor->enum_type());
- }
- }
- void AddEnumDescriptor(const EnumDescriptor* enumDescriptor)
- {
- auto [it, inserted] = EnumDescriptors_.insert(enumDescriptor);
- if (!inserted) {
- return;
- }
- const auto* containingType = enumDescriptor->containing_type();
- while (containingType) {
- AddDescriptor(containingType);
- containingType = containingType->containing_type();
- }
- }
- void TraverseDependencies(
- const FileDescriptor* current,
- THashSet<const FileDescriptor*>& visited,
- TVector<const FileDescriptor*>& topoOrder)
- {
- auto [it, inserted] = visited.insert(current);
- if (!inserted) {
- return;
- }
- for (int i = 0; i < current->dependency_count(); ++i) {
- TraverseDependencies(current->dependency(i), visited, topoOrder);
- }
- topoOrder.push_back(current);
- }
- template <typename TOptions>
- void StripUnknownOptions(TOptions* options)
- {
- std::vector<const FieldDescriptor*> fields;
- auto reflection = options->GetReflection();
- reflection->ListFields(*options, &fields);
- for (auto field : fields) {
- if (field->is_extension() && field->file() != ExtensionFile_) {
- reflection->ClearField(options, field);
- }
- }
- }
- template <typename TRepeatedField, typename TPredicate>
- void RemoveIf(TRepeatedField* repeatedField, TPredicate predicate)
- {
- repeatedField->erase(
- std::remove_if(repeatedField->begin(), repeatedField->end(), predicate),
- repeatedField->end());
- }
- void Strip(
- const TString& containingTypePrefix,
- DescriptorProto* messageProto,
- const THashSet<TString>& messageTypeNames,
- const THashSet<TString>& enumTypeNames)
- {
- const auto prefix = containingTypePrefix + messageProto->name() + '.';
- RemoveIf(messageProto->mutable_nested_type(), [&] (const DescriptorProto& descriptorProto) {
- return !messageTypeNames.contains(prefix + descriptorProto.name());
- });
- RemoveIf(messageProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) {
- return !enumTypeNames.contains(prefix + enumDescriptorProto.name());
- });
- messageProto->clear_extension();
- StripUnknownOptions(messageProto->mutable_options());
- for (auto& fieldProto : *messageProto->mutable_field()) {
- StripUnknownOptions(fieldProto.mutable_options());
- }
- for (auto& oneofProto : *messageProto->mutable_oneof_decl()) {
- StripUnknownOptions(oneofProto.mutable_options());
- }
- for (auto& nestedTypeProto : *messageProto->mutable_nested_type()) {
- Strip(prefix, &nestedTypeProto, messageTypeNames, enumTypeNames);
- }
- for (auto& enumProto : *messageProto->mutable_enum_type()) {
- StripUnknownOptions(enumProto.mutable_options());
- for (auto& enumValue : *enumProto.mutable_value()) {
- StripUnknownOptions(enumValue.mutable_options());
- }
- }
- }
- void Strip(
- FileDescriptorProto* fileProto,
- const THashSet<TString>& messageTypeNames,
- const THashSet<TString>& enumTypeNames)
- {
- const auto prefix = fileProto->package().empty()
- ? ""
- : FromProto<TString>(fileProto->package()) + '.';
- RemoveIf(fileProto->mutable_message_type(), [&] (const DescriptorProto& descriptorProto) {
- return !messageTypeNames.contains(prefix + descriptorProto.name());
- });
- RemoveIf(fileProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) {
- return !enumTypeNames.contains(prefix + enumDescriptorProto.name());
- });
- fileProto->clear_service();
- fileProto->clear_extension();
- StripUnknownOptions(fileProto->mutable_options());
- for (auto& messageProto : *fileProto->mutable_message_type()) {
- Strip(prefix, &messageProto, messageTypeNames, enumTypeNames);
- }
- for (auto& enumProto : *fileProto->mutable_enum_type()) {
- StripUnknownOptions(enumProto.mutable_options());
- for (auto& enumValue : *enumProto.mutable_value()) {
- StripUnknownOptions(enumValue.mutable_options());
- }
- }
- }
- private:
- const FileDescriptor* const ExtensionFile_;
- THashSet<const Descriptor*> AllDescriptors_;
- THashSet<const EnumDescriptor*> EnumDescriptors_;
- };
- TNode MakeProtoFormatConfigWithDescriptors(const TVector<const Descriptor*>& descriptors)
- {
- TFileDescriptorSetBuilder builder;
- auto typeNames = TNode::CreateList();
- for (const auto* descriptor : descriptors) {
- builder.AddDescriptor(descriptor);
- typeNames.Add(FromProto<TString>(descriptor->full_name()));
- }
- auto fileDescriptorSetText = FromProto<TString>(builder.Build().ShortDebugString());
- TNode config("protobuf");
- config.Attributes()
- ("file_descriptor_set_text", std::move(fileDescriptorSetText))
- ("type_names", std::move(typeNames));
- return config;
- }
- ////////////////////////////////////////////////////////////////////////////////
- using TTypePtrOrOtherColumns = std::variant<NTi::TTypePtr, TOtherColumns>;
- struct TMember {
- TString Name;
- TTypePtrOrOtherColumns TypeOrOtherColumns;
- };
- ////////////////////////////////////////////////////////////////////////////////
- TValueTypeOrOtherColumns GetScalarFieldType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& options)
- {
- if (options.Type) {
- switch (*options.Type) {
- case EProtobufType::EnumInt:
- return EValueType::VT_INT64;
- case EProtobufType::EnumString:
- return EValueType::VT_STRING;
- case EProtobufType::Any:
- return EValueType::VT_ANY;
- case EProtobufType::OtherColumns:
- return TOtherColumns{};
- }
- Y_ABORT();
- }
- switch (fieldDescriptor.cpp_type()) {
- case FieldDescriptor::CPPTYPE_INT32:
- return EValueType::VT_INT32;
- case FieldDescriptor::CPPTYPE_INT64:
- return EValueType::VT_INT64;
- case FieldDescriptor::CPPTYPE_UINT32:
- return EValueType::VT_UINT32;
- case FieldDescriptor::CPPTYPE_UINT64:
- return EValueType::VT_UINT64;
- case FieldDescriptor::CPPTYPE_FLOAT:
- case FieldDescriptor::CPPTYPE_DOUBLE:
- return EValueType::VT_DOUBLE;
- case FieldDescriptor::CPPTYPE_BOOL:
- return EValueType::VT_BOOLEAN;
- case FieldDescriptor::CPPTYPE_STRING:
- case FieldDescriptor::CPPTYPE_MESSAGE:
- case FieldDescriptor::CPPTYPE_ENUM:
- return EValueType::VT_STRING;
- default:
- ythrow yexception() <<
- "Unexpected field type '" << fieldDescriptor.cpp_type_name() << "' " <<
- "for field " << fieldDescriptor.name();
- }
- }
- bool HasNameExtension(const FieldDescriptor& fieldDescriptor)
- {
- const auto& options = fieldDescriptor.options();
- return options.HasExtension(column_name) || options.HasExtension(key_column_name);
- }
- void SortFields(TVector<const FieldDescriptor*>& fieldDescriptors, EProtobufFieldSortOrder fieldSortOrder)
- {
- switch (fieldSortOrder) {
- case EProtobufFieldSortOrder::AsInProtoFile:
- return;
- case EProtobufFieldSortOrder::ByFieldNumber:
- SortBy(fieldDescriptors, [] (const FieldDescriptor* fieldDescriptor) {
- return fieldDescriptor->number();
- });
- return;
- }
- Y_ABORT();
- }
- NTi::TTypePtr CreateStruct(TStringBuf fieldName, TVector<TMember> members)
- {
- TVector<NTi::TStructType::TOwnedMember> structMembers;
- structMembers.reserve(members.size());
- for (auto& member : members) {
- std::visit(TOverloaded{
- [&] (TOtherColumns) {
- ythrow TApiUsageError() <<
- "Could not deduce YT type for field " << member.Name << " of " <<
- "embedded message field " << fieldName << " " <<
- "(note that " << EWrapperFieldFlag::OTHER_COLUMNS << " fields " <<
- "are not allowed inside embedded messages)";
- },
- [&] (NTi::TTypePtr& type) {
- structMembers.emplace_back(std::move(member.Name), std::move(type));
- },
- }, member.TypeOrOtherColumns);
- }
- return NTi::Struct(std::move(structMembers));
- }
- TMaybe<TVector<TString>> InferColumnFilter(const ::google::protobuf::Descriptor& descriptor)
- {
- auto isOtherColumns = [] (const ::google::protobuf::FieldDescriptor& field) {
- return GetFieldOptions(&field).Type == EProtobufType::OtherColumns;
- };
- TVector<TString> result;
- result.reserve(descriptor.field_count());
- for (int i = 0; i < descriptor.field_count(); ++i) {
- const auto& field = *descriptor.field(i);
- if (isOtherColumns(field)) {
- return {};
- }
- result.push_back(GetColumnName(field));
- }
- return result;
- }
- ////////////////////////////////////////////////////////////////////////////////
- class TTableSchemaInferrer
- {
- public:
- TTableSchemaInferrer(bool keepFieldsWithoutExtension)
- : KeepFieldsWithoutExtension_(keepFieldsWithoutExtension)
- { }
- TTableSchema InferSchema(const Descriptor& messageDescriptor);
- private:
- TTypePtrOrOtherColumns GetFieldType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& defaultOptions);
- void ProcessOneofField(
- TStringBuf containingFieldName,
- const OneofDescriptor& oneofDescriptor,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- EProtobufFieldSortOrder fieldSortOrder,
- TVector<TMember>* members);
- TVector<TMember> GetMessageMembers(
- TStringBuf containingFieldName,
- const Descriptor& fieldDescriptor,
- TProtobufFieldOptions defaultFieldOptions,
- std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder = std::nullopt);
- NTi::TTypePtr GetMessageType(
- const FieldDescriptor& fieldDescriptor,
- TProtobufFieldOptions defaultFieldOptions);
- NTi::TTypePtr GetMapType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& fieldOptions);
- private:
- void GetMessageMembersImpl(
- TStringBuf containingFieldName,
- const Descriptor& fieldDescriptor,
- TProtobufFieldOptions defaultFieldOptions,
- std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder,
- TVector<TMember>* members);
- private:
- const bool KeepFieldsWithoutExtension_;
- TCycleChecker CycleChecker_;
- };
- void TTableSchemaInferrer::ProcessOneofField(
- TStringBuf containingFieldName,
- const OneofDescriptor& oneofDescriptor,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- EProtobufFieldSortOrder fieldSortOrder,
- TVector<TMember>* members)
- {
- auto oneofOptions = GetOneofOptions(&oneofDescriptor, defaultOneofOptions);
- auto addFields = [&] (TVector<TMember>* members, bool removeOptionality) {
- TVector<const FieldDescriptor*> fieldDescriptors;
- for (int i = 0; i < oneofDescriptor.field_count(); ++i) {
- fieldDescriptors.push_back(oneofDescriptor.field(i));
- }
- SortFields(fieldDescriptors, fieldSortOrder);
- for (auto innerFieldDescriptor : fieldDescriptors) {
- auto typeOrOtherColumns = GetFieldType(
- *innerFieldDescriptor,
- defaultFieldOptions);
- if (auto* maybeType = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
- maybeType && removeOptionality && (*maybeType)->IsOptional())
- {
- typeOrOtherColumns = (*maybeType)->AsOptional()->GetItemType();
- }
- members->push_back(TMember{
- GetColumnName(*innerFieldDescriptor),
- std::move(typeOrOtherColumns),
- });
- }
- };
- switch (oneofOptions.Mode) {
- case EProtobufOneofMode::SeparateFields:
- addFields(members, /* removeOptionality */ false);
- return;
- case EProtobufOneofMode::Variant: {
- TVector<TMember> variantMembers;
- addFields(&variantMembers, /* removeOptionality */ true);
- members->push_back(TMember{
- oneofOptions.VariantFieldName,
- NTi::Optional(
- NTi::Variant(
- CreateStruct(containingFieldName, std::move(variantMembers))
- )
- )
- });
- return;
- }
- }
- Y_ABORT();
- }
- TVector<TMember> TTableSchemaInferrer::GetMessageMembers(
- TStringBuf containingFieldName,
- const Descriptor& messageDescriptor,
- TProtobufFieldOptions defaultFieldOptions,
- std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder)
- {
- TVector<TMember> members;
- GetMessageMembersImpl(
- containingFieldName,
- messageDescriptor,
- defaultFieldOptions,
- overrideFieldSortOrder,
- &members
- );
- return members;
- }
- void TTableSchemaInferrer::GetMessageMembersImpl(
- TStringBuf containingFieldName,
- const Descriptor& messageDescriptor,
- TProtobufFieldOptions defaultFieldOptions,
- std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder,
- TVector<TMember>* members)
- {
- auto guard = CycleChecker_.Enter(&messageDescriptor);
- defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor, defaultFieldOptions);
- auto messageOptions = GetMessageOptions(&messageDescriptor);
- auto defaultOneofOptions = GetDefaultOneofOptions(&messageDescriptor);
- TVector<const FieldDescriptor*> fieldDescriptors;
- fieldDescriptors.reserve(messageDescriptor.field_count());
- for (int i = 0; i < messageDescriptor.field_count(); ++i) {
- if (!KeepFieldsWithoutExtension_ && !HasNameExtension(*messageDescriptor.field(i))) {
- continue;
- }
- fieldDescriptors.push_back(messageDescriptor.field(i));
- }
- auto fieldSortOrder = overrideFieldSortOrder.value_or(messageOptions.FieldSortOrder);
- SortFields(fieldDescriptors, fieldSortOrder);
- THashSet<const OneofDescriptor*> visitedOneofs;
- for (const auto innerFieldDescriptor : fieldDescriptors) {
- auto oneofDescriptor = innerFieldDescriptor->containing_oneof();
- if (oneofDescriptor) {
- if (visitedOneofs.contains(oneofDescriptor)) {
- continue;
- }
- ProcessOneofField(
- containingFieldName,
- *oneofDescriptor,
- defaultFieldOptions,
- defaultOneofOptions,
- messageOptions.FieldSortOrder,
- members);
- visitedOneofs.insert(oneofDescriptor);
- continue;
- }
- auto fieldOptions = GetFieldOptions(innerFieldDescriptor, defaultFieldOptions);
- if (fieldOptions.SerializationMode == EProtobufSerializationMode::Embedded) {
- Y_ENSURE(innerFieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE,
- "EMBEDDED column must have message type");
- Y_ENSURE(innerFieldDescriptor->label() == FieldDescriptor::LABEL_REQUIRED,
- "EMBEDDED column must be marked required");
- GetMessageMembersImpl(
- innerFieldDescriptor->full_name(),
- *innerFieldDescriptor->message_type(),
- defaultFieldOptions,
- /*overrideFieldSortOrder*/ std::nullopt,
- members);
- } else {
- auto typeOrOtherColumns = GetFieldType(
- *innerFieldDescriptor,
- defaultFieldOptions);
- members->push_back(TMember{
- GetColumnName(*innerFieldDescriptor),
- std::move(typeOrOtherColumns),
- });
- }
- }
- }
- NTi::TTypePtr TTableSchemaInferrer::GetMessageType(
- const FieldDescriptor& fieldDescriptor,
- TProtobufFieldOptions defaultFieldOptions)
- {
- Y_ABORT_UNLESS(fieldDescriptor.message_type());
- const auto& messageDescriptor = *fieldDescriptor.message_type();
- auto members = GetMessageMembers(
- fieldDescriptor.full_name(),
- messageDescriptor,
- defaultFieldOptions);
- return CreateStruct(fieldDescriptor.full_name(), std::move(members));
- }
- NTi::TTypePtr TTableSchemaInferrer::GetMapType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& fieldOptions)
- {
- Y_ABORT_UNLESS(fieldDescriptor.is_map());
- switch (fieldOptions.MapMode) {
- case EProtobufMapMode::ListOfStructsLegacy:
- case EProtobufMapMode::ListOfStructs: {
- TProtobufFieldOptions embeddedOptions;
- if (fieldOptions.MapMode == EProtobufMapMode::ListOfStructs) {
- embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt;
- }
- auto list = NTi::List(GetMessageType(fieldDescriptor, embeddedOptions));
- switch (fieldOptions.ListMode) {
- case EProtobufListMode::Required:
- return list;
- case EProtobufListMode::Optional:
- return NTi::Optional(std::move(list));
- }
- Y_ABORT();
- }
- case EProtobufMapMode::Dict:
- case EProtobufMapMode::OptionalDict: {
- auto message = fieldDescriptor.message_type();
- Y_ABORT_UNLESS(message->field_count() == 2);
- auto keyVariant = GetScalarFieldType(*message->field(0), TProtobufFieldOptions{});
- Y_ABORT_UNLESS(std::holds_alternative<EValueType>(keyVariant));
- auto key = std::get<EValueType>(keyVariant);
- TProtobufFieldOptions embeddedOptions;
- embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt;
- auto valueVariant = GetFieldType(*message->field(1), embeddedOptions);
- Y_ABORT_UNLESS(std::holds_alternative<NTi::TTypePtr>(valueVariant));
- auto value = std::get<NTi::TTypePtr>(valueVariant);
- Y_ABORT_UNLESS(value->IsOptional());
- value = value->AsOptional()->GetItemType();
- auto dict = NTi::Dict(ToTypeV3(key, true), value);
- if (fieldOptions.MapMode == EProtobufMapMode::OptionalDict) {
- return NTi::Optional(dict);
- } else {
- return dict;
- }
- }
- }
- }
- TTypePtrOrOtherColumns TTableSchemaInferrer::GetFieldType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& defaultOptions)
- {
- auto fieldOptions = GetFieldOptions(&fieldDescriptor, defaultOptions);
- if (fieldOptions.Type) {
- ValidateProtobufType(fieldDescriptor, *fieldOptions.Type);
- }
- auto getScalarType = [&] {
- auto valueTypeOrOtherColumns = GetScalarFieldType(fieldDescriptor, fieldOptions);
- return std::visit(TOverloaded{
- [] (TOtherColumns) -> TTypePtrOrOtherColumns {
- return TOtherColumns{};
- },
- [] (EValueType valueType) -> TTypePtrOrOtherColumns {
- return ToTypeV3(valueType, true);
- }
- }, valueTypeOrOtherColumns);
- };
- auto withFieldLabel = [&] (const TTypePtrOrOtherColumns& typeOrOtherColumns) -> TTypePtrOrOtherColumns {
- switch (fieldDescriptor.label()) {
- case FieldDescriptor::Label::LABEL_REPEATED: {
- Y_ENSURE(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt,
- "Repeated fields are supported only for YT serialization mode, field \"" + fieldDescriptor.full_name() +
- "\" has incorrect serialization mode");
- auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
- Y_ENSURE(type, "OTHER_COLUMNS field can not be repeated");
- switch (fieldOptions.ListMode) {
- case EProtobufListMode::Required:
- return NTi::TTypePtr(NTi::List(*type));
- case EProtobufListMode::Optional:
- return NTi::TTypePtr(NTi::Optional(NTi::List(*type)));
- }
- Y_ABORT();
- }
- case FieldDescriptor::Label::LABEL_OPTIONAL:
- return std::visit(TOverloaded{
- [] (TOtherColumns) -> TTypePtrOrOtherColumns {
- return TOtherColumns{};
- },
- [] (NTi::TTypePtr type) -> TTypePtrOrOtherColumns {
- return NTi::TTypePtr(NTi::Optional(std::move(type)));
- }
- }, typeOrOtherColumns);
- case FieldDescriptor::LABEL_REQUIRED: {
- auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
- Y_ENSURE(type, "OTHER_COLUMNS field can not be required");
- return *type;
- }
- }
- Y_ABORT();
- };
- switch (fieldOptions.SerializationMode) {
- case EProtobufSerializationMode::Protobuf:
- return withFieldLabel(getScalarType());
- case EProtobufSerializationMode::Yt:
- if (fieldDescriptor.type() == FieldDescriptor::TYPE_MESSAGE) {
- if (fieldDescriptor.is_map()) {
- return GetMapType(fieldDescriptor, fieldOptions);
- } else {
- return withFieldLabel(GetMessageType(fieldDescriptor, TProtobufFieldOptions{}));
- }
- } else {
- return withFieldLabel(getScalarType());
- }
- case EProtobufSerializationMode::Embedded:
- ythrow yexception() << "EMBEDDED field is not allowed for field "
- << fieldDescriptor.full_name();
- }
- Y_ABORT();
- }
- TTableSchema TTableSchemaInferrer::InferSchema(const Descriptor& messageDescriptor)
- {
- TTableSchema result;
- auto defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor);
- auto members = GetMessageMembers(
- messageDescriptor.full_name(),
- messageDescriptor,
- defaultFieldOptions,
- // Use special sort order for top level messages.
- /*overrideFieldSortOrder*/ EProtobufFieldSortOrder::AsInProtoFile);
- for (auto& member : members) {
- std::visit(TOverloaded{
- [&] (TOtherColumns) {
- result.Strict(false);
- },
- [&] (NTi::TTypePtr& type) {
- result.AddColumn(TColumnSchema()
- .Name(std::move(member.Name))
- .Type(std::move(type))
- );
- },
- }, member.TypeOrOtherColumns);
- }
- return result;
- }
- TTableSchema CreateTableSchemaImpl(
- const Descriptor& messageDescriptor,
- bool keepFieldsWithoutExtension)
- {
- TTableSchemaInferrer inferrer(keepFieldsWithoutExtension);
- return inferrer.InferSchema(messageDescriptor);
- }
- ////////////////////////////////////////////////////////////////////////////////
- } // namespace NYT::NDetail
- ////////////////////////////////////////////////////////////////////////////////
- template <>
- void Out<NYT::EWrapperFieldFlag::Enum>(IOutputStream& stream, NYT::EWrapperFieldFlag::Enum value)
- {
- stream << NYT::EWrapperFieldFlag_Enum_Name(value);
- }
- template <>
- void Out<NYT::EWrapperMessageFlag::Enum>(IOutputStream& stream, NYT::EWrapperMessageFlag::Enum value)
- {
- stream << NYT::EWrapperMessageFlag_Enum_Name(value);
- }
- template <>
- void Out<NYT::EWrapperOneofFlag::Enum>(IOutputStream& stream, NYT::EWrapperOneofFlag::Enum value)
- {
- stream << NYT::EWrapperOneofFlag_Enum_Name(value);
- }
|