1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498 |
- #include "protobuf_format.h"
- #include "errors.h"
- #include <yt/yt_proto/yt/formats/extension.pb.h>
- #include <google/protobuf/text_format.h>
- #include <library/cpp/yson/node/node_io.h>
- #include <util/generic/hash_set.h>
- #include <util/generic/stack.h>
- #include <util/generic/overloaded.h>
- #include <util/stream/output.h>
- #include <util/stream/file.h>
- namespace NYT::NDetail {
- using ::google::protobuf::Descriptor;
- using ::google::protobuf::DescriptorProto;
- using ::google::protobuf::EnumDescriptor;
- using ::google::protobuf::EnumDescriptorProto;
- using ::google::protobuf::FieldDescriptor;
- using ::google::protobuf::FieldDescriptorProto;
- using ::google::protobuf::OneofDescriptor;
- using ::google::protobuf::Message;
- using ::google::protobuf::FileDescriptor;
- using ::google::protobuf::FileDescriptorProto;
- using ::google::protobuf::FileDescriptorSet;
- using ::google::protobuf::FieldOptions;
- using ::google::protobuf::FileOptions;
- using ::google::protobuf::OneofOptions;
- using ::google::protobuf::MessageOptions;
- using ::ToString;
- namespace {
- ////////////////////////////////////////////////////////////////////////////////
- using TOneofOption = std::variant<
- EProtobufOneofMode>;
- using TFieldOption = std::variant<
- EProtobufType,
- EProtobufSerializationMode,
- EProtobufListMode,
- EProtobufMapMode,
- EProtobufEnumWritingMode>;
- using TMessageOption = std::variant<
- EProtobufFieldSortOrder>;
- struct TOtherColumns
- { };
- using TValueTypeOrOtherColumns = std::variant<EValueType, TOtherColumns>;
- ////////////////////////////////////////////////////////////////////////////////
- TFieldOption FieldFlagToOption(EWrapperFieldFlag::Enum flag)
- {
- using EFlag = EWrapperFieldFlag;
- switch (flag) {
- case EFlag::SERIALIZATION_PROTOBUF:
- return EProtobufSerializationMode::Protobuf;
- case EFlag::SERIALIZATION_YT:
- return EProtobufSerializationMode::Yt;
- case EFlag::ANY:
- return EProtobufType::Any;
- case EFlag::OTHER_COLUMNS:
- return EProtobufType::OtherColumns;
- case EFlag::ENUM_INT:
- return EProtobufType::EnumInt;
- case EFlag::ENUM_STRING:
- return EProtobufType::EnumString;
- case EFlag::OPTIONAL_LIST:
- return EProtobufListMode::Optional;
- case EFlag::REQUIRED_LIST:
- return EProtobufListMode::Required;
- case EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY:
- return EProtobufMapMode::ListOfStructsLegacy;
- case EFlag::MAP_AS_LIST_OF_STRUCTS:
- return EProtobufMapMode::ListOfStructs;
- case EFlag::MAP_AS_DICT:
- return EProtobufMapMode::Dict;
- case EFlag::MAP_AS_OPTIONAL_DICT:
- return EProtobufMapMode::OptionalDict;
- case EFlag::EMBEDDED:
- return EProtobufSerializationMode::Embedded;
- case EFlag::ENUM_SKIP_UNKNOWN_VALUES:
- return EProtobufEnumWritingMode::SkipUnknownValues;
- case EFlag::ENUM_CHECK_VALUES:
- return EProtobufEnumWritingMode::CheckValues;
- }
- Y_ABORT();
- }
- TMessageOption MessageFlagToOption(EWrapperMessageFlag::Enum flag)
- {
- using EFlag = EWrapperMessageFlag;
- switch (flag) {
- case EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE:
- return EProtobufFieldSortOrder::AsInProtoFile;
- case EFlag::SORT_FIELDS_BY_FIELD_NUMBER:
- return EProtobufFieldSortOrder::ByFieldNumber;
- }
- Y_ABORT();
- }
- TOneofOption OneofFlagToOption(EWrapperOneofFlag::Enum flag)
- {
- using EFlag = EWrapperOneofFlag;
- switch (flag) {
- case EFlag::SEPARATE_FIELDS:
- return EProtobufOneofMode::SeparateFields;
- case EFlag::VARIANT:
- return EProtobufOneofMode::Variant;
- }
- Y_ABORT();
- }
- EWrapperFieldFlag::Enum OptionToFieldFlag(TFieldOption option)
- {
- using EFlag = EWrapperFieldFlag;
- struct TVisitor
- {
- EFlag::Enum operator() (EProtobufType type)
- {
- switch (type) {
- case EProtobufType::Any:
- return EFlag::ANY;
- case EProtobufType::OtherColumns:
- return EFlag::OTHER_COLUMNS;
- case EProtobufType::EnumInt:
- return EFlag::ENUM_INT;
- case EProtobufType::EnumString:
- return EFlag::ENUM_STRING;
- }
- Y_ABORT();
- }
- EFlag::Enum operator() (EProtobufSerializationMode serializationMode)
- {
- switch (serializationMode) {
- case EProtobufSerializationMode::Yt:
- return EFlag::SERIALIZATION_YT;
- case EProtobufSerializationMode::Protobuf:
- return EFlag::SERIALIZATION_PROTOBUF;
- case EProtobufSerializationMode::Embedded:
- return EFlag::EMBEDDED;
- }
- Y_ABORT();
- }
- EFlag::Enum operator() (EProtobufListMode listMode)
- {
- switch (listMode) {
- case EProtobufListMode::Optional:
- return EFlag::OPTIONAL_LIST;
- case EProtobufListMode::Required:
- return EFlag::REQUIRED_LIST;
- }
- Y_ABORT();
- }
- EFlag::Enum operator() (EProtobufMapMode mapMode)
- {
- switch (mapMode) {
- case EProtobufMapMode::ListOfStructsLegacy:
- return EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY;
- case EProtobufMapMode::ListOfStructs:
- return EFlag::MAP_AS_LIST_OF_STRUCTS;
- case EProtobufMapMode::Dict:
- return EFlag::MAP_AS_DICT;
- case EProtobufMapMode::OptionalDict:
- return EFlag::MAP_AS_OPTIONAL_DICT;
- }
- Y_ABORT();
- }
- EFlag::Enum operator() (EProtobufEnumWritingMode enumWritingMode)
- {
- switch (enumWritingMode) {
- case EProtobufEnumWritingMode::SkipUnknownValues:
- return EFlag::ENUM_SKIP_UNKNOWN_VALUES;
- case EProtobufEnumWritingMode::CheckValues:
- return EFlag::ENUM_CHECK_VALUES;
- }
- Y_ABORT();
- }
- };
- return std::visit(TVisitor(), option);
- }
- EWrapperMessageFlag::Enum OptionToMessageFlag(TMessageOption option)
- {
- using EFlag = EWrapperMessageFlag;
- struct TVisitor
- {
- EFlag::Enum operator() (EProtobufFieldSortOrder sortOrder)
- {
- switch (sortOrder) {
- case EProtobufFieldSortOrder::AsInProtoFile:
- return EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
- case EProtobufFieldSortOrder::ByFieldNumber:
- return EFlag::SORT_FIELDS_BY_FIELD_NUMBER;
- }
- Y_ABORT();
- }
- };
- return std::visit(TVisitor(), option);
- }
- EWrapperOneofFlag::Enum OptionToOneofFlag(TOneofOption option)
- {
- using EFlag = EWrapperOneofFlag;
- struct TVisitor
- {
- EFlag::Enum operator() (EProtobufOneofMode mode)
- {
- switch (mode) {
- case EProtobufOneofMode::SeparateFields:
- return EFlag::SEPARATE_FIELDS;
- case EProtobufOneofMode::Variant:
- return EFlag::VARIANT;
- }
- Y_ABORT();
- }
- };
- return std::visit(TVisitor(), option);
- }
- template <typename T, typename TOptionToFlag>
- void SetOption(TMaybe<T>& option, T newOption, TOptionToFlag optionToFlag)
- {
- if (option) {
- if (*option == newOption) {
- ythrow yexception() << "Duplicate protobuf flag " << optionToFlag(newOption);
- } else {
- ythrow yexception() << "Incompatible protobuf flags " <<
- optionToFlag(*option) << " and " << optionToFlag(newOption);
- }
- }
- option = newOption;
- }
- class TParseProtobufFieldOptionsVisitor
- {
- public:
- void operator() (EProtobufType type)
- {
- SetOption(Type, type);
- }
- void operator() (EProtobufSerializationMode serializationMode)
- {
- SetOption(SerializationMode, serializationMode);
- }
- void operator() (EProtobufListMode listMode)
- {
- SetOption(ListMode, listMode);
- }
- void operator() (EProtobufMapMode mapMode)
- {
- SetOption(MapMode, mapMode);
- }
- void operator() (EProtobufEnumWritingMode enumWritingMode)
- {
- SetOption(EnumWritingMode, enumWritingMode);
- }
- template <typename T>
- void SetOption(TMaybe<T>& option, T newOption)
- {
- NYT::NDetail::SetOption(option, newOption, OptionToFieldFlag);
- }
- public:
- TMaybe<EProtobufType> Type;
- TMaybe<EProtobufSerializationMode> SerializationMode;
- TMaybe<EProtobufListMode> ListMode;
- TMaybe<EProtobufMapMode> MapMode;
- TMaybe<EProtobufEnumWritingMode> EnumWritingMode;
- };
- class TParseProtobufMessageOptionsVisitor
- {
- public:
- void operator() (EProtobufFieldSortOrder fieldSortOrder)
- {
- SetOption(FieldSortOrder, fieldSortOrder);
- }
- template <typename T>
- void SetOption(TMaybe<T>& option, T newOption)
- {
- NYT::NDetail::SetOption(option, newOption, OptionToMessageFlag);
- }
- public:
- TMaybe<EProtobufFieldSortOrder> FieldSortOrder;
- };
- class TParseProtobufOneofOptionsVisitor
- {
- public:
- void operator() (EProtobufOneofMode mode)
- {
- SetOption(Mode, mode);
- }
- template <typename T>
- void SetOption(TMaybe<T>& option, T newOption)
- {
- NYT::NDetail::SetOption(option, newOption, OptionToOneofFlag);
- }
- public:
- TMaybe<EProtobufOneofMode> Mode;
- };
- void ParseProtobufFieldOptions(
- const ::google::protobuf::RepeatedField<EWrapperFieldFlag::Enum>& flags,
- TProtobufFieldOptions* fieldOptions)
- {
- TParseProtobufFieldOptionsVisitor visitor;
- for (auto flag : flags) {
- std::visit(visitor, FieldFlagToOption(flag));
- }
- if (visitor.Type) {
- fieldOptions->Type = *visitor.Type;
- }
- if (visitor.SerializationMode) {
- fieldOptions->SerializationMode = *visitor.SerializationMode;
- }
- if (visitor.ListMode) {
- fieldOptions->ListMode = *visitor.ListMode;
- }
- if (visitor.MapMode) {
- fieldOptions->MapMode = *visitor.MapMode;
- }
- }
- void ParseProtobufMessageOptions(
- const ::google::protobuf::RepeatedField<EWrapperMessageFlag::Enum>& flags,
- TProtobufMessageOptions* messageOptions)
- {
- TParseProtobufMessageOptionsVisitor visitor;
- for (auto flag : flags) {
- std::visit(visitor, MessageFlagToOption(flag));
- }
- if (visitor.FieldSortOrder) {
- messageOptions->FieldSortOrder = *visitor.FieldSortOrder;
- }
- }
- void ParseProtobufOneofOptions(
- const ::google::protobuf::RepeatedField<EWrapperOneofFlag::Enum>& flags,
- TProtobufOneofOptions* messageOptions)
- {
- TParseProtobufOneofOptionsVisitor visitor;
- for (auto flag : flags) {
- std::visit(visitor, OneofFlagToOption(flag));
- }
- if (visitor.Mode) {
- messageOptions->Mode = *visitor.Mode;
- }
- }
- TProtobufFieldOptions GetDefaultFieldOptions(
- const Descriptor* descriptor,
- TProtobufFieldOptions defaultFieldOptions = {})
- {
- ParseProtobufFieldOptions(
- descriptor->file()->options().GetRepeatedExtension(file_default_field_flags),
- &defaultFieldOptions);
- ParseProtobufFieldOptions(
- descriptor->options().GetRepeatedExtension(default_field_flags),
- &defaultFieldOptions);
- return defaultFieldOptions;
- }
- TProtobufOneofOptions GetDefaultOneofOptions(const Descriptor* descriptor)
- {
- TProtobufOneofOptions defaultOneofOptions;
- ParseProtobufOneofOptions(
- descriptor->file()->options().GetRepeatedExtension(file_default_oneof_flags),
- &defaultOneofOptions);
- ParseProtobufOneofOptions(
- descriptor->options().GetRepeatedExtension(default_oneof_flags),
- &defaultOneofOptions);
- switch (defaultOneofOptions.Mode) {
- case EProtobufOneofMode::Variant: {
- auto defaultFieldOptions = GetDefaultFieldOptions(descriptor);
- switch (defaultFieldOptions.SerializationMode) {
- case EProtobufSerializationMode::Protobuf:
- // For Protobuf serialization mode default is SeparateFields.
- defaultOneofOptions.Mode = EProtobufOneofMode::SeparateFields;
- return defaultOneofOptions;
- case EProtobufSerializationMode::Yt:
- case EProtobufSerializationMode::Embedded:
- return defaultOneofOptions;
- }
- Y_ABORT();
- }
- case EProtobufOneofMode::SeparateFields:
- return defaultOneofOptions;
- }
- Y_ABORT();
- }
- ////////////////////////////////////////////////////////////////////////////////
- void ValidateProtobufType(const FieldDescriptor& fieldDescriptor, EProtobufType protobufType)
- {
- const auto fieldType = fieldDescriptor.type();
- auto ensureType = [&] (FieldDescriptor::Type expectedType) {
- Y_ENSURE(fieldType == expectedType,
- "Type of field " << fieldDescriptor.name() << "does not match specified field flag " <<
- OptionToFieldFlag(protobufType) << ": "
- "expected " << FieldDescriptor::TypeName(expectedType) << ", " <<
- "got " << FieldDescriptor::TypeName(fieldType));
- };
- switch (protobufType) {
- case EProtobufType::Any:
- ensureType(FieldDescriptor::TYPE_BYTES);
- return;
- case EProtobufType::OtherColumns:
- ensureType(FieldDescriptor::TYPE_BYTES);
- return;
- case EProtobufType::EnumInt:
- ensureType(FieldDescriptor::TYPE_ENUM);
- return;
- case EProtobufType::EnumString:
- ensureType(FieldDescriptor::TYPE_ENUM);
- return;
- }
- Y_ABORT();
- }
- ////////////////////////////////////////////////////////////////////////////////
- class TCycleChecker
- {
- private:
- class TGuard
- {
- public:
- TGuard(TCycleChecker* checker, const Descriptor* descriptor)
- : Checker_(checker)
- , Descriptor_(descriptor)
- {
- Checker_->ActiveVertices_.insert(Descriptor_);
- Checker_->Stack_.push(Descriptor_);
- }
- ~TGuard()
- {
- Checker_->ActiveVertices_.erase(Descriptor_);
- Checker_->Stack_.pop();
- }
- private:
- TCycleChecker* Checker_;
- const Descriptor* Descriptor_;
- };
- public:
- [[nodiscard]] TGuard Enter(const Descriptor* descriptor)
- {
- if (ActiveVertices_.contains(descriptor)) {
- Y_ABORT_UNLESS(!Stack_.empty());
- ythrow TApiUsageError() << "Cyclic reference found for protobuf messages. " <<
- "Consider removing " << EWrapperFieldFlag::SERIALIZATION_YT << " flag " <<
- "somewhere on the cycle containing " <<
- Stack_.top()->full_name() << " and " << descriptor->full_name();
- }
- return TGuard(this, descriptor);
- }
- private:
- THashSet<const Descriptor*> ActiveVertices_;
- TStack<const Descriptor*> Stack_;
- };
- ////////////////////////////////////////////////////////////////////////////////
- } // namespace
- ////////////////////////////////////////////////////////////////////////////////
- TProtobufFieldOptions GetFieldOptions(
- const FieldDescriptor* fieldDescriptor,
- const TMaybe<TProtobufFieldOptions>& defaultFieldOptions)
- {
- TProtobufFieldOptions options;
- if (defaultFieldOptions) {
- options = *defaultFieldOptions;
- } else {
- options = GetDefaultFieldOptions(fieldDescriptor->containing_type());
- }
- ParseProtobufFieldOptions(fieldDescriptor->options().GetRepeatedExtension(flags), &options);
- return options;
- }
- TProtobufOneofOptions GetOneofOptions(
- const OneofDescriptor* oneofDescriptor,
- const TMaybe<TProtobufOneofOptions>& defaultOneofOptions)
- {
- TProtobufOneofOptions options;
- if (defaultOneofOptions) {
- options = *defaultOneofOptions;
- } else {
- options = GetDefaultOneofOptions(oneofDescriptor->containing_type());
- }
- ParseProtobufOneofOptions(oneofDescriptor->options().GetRepeatedExtension(oneof_flags), &options);
- if (oneofDescriptor->is_synthetic()) {
- options.Mode = EProtobufOneofMode::SeparateFields;
- }
- auto variantFieldName = oneofDescriptor->options().GetExtension(variant_field_name);
- switch (options.Mode) {
- case EProtobufOneofMode::SeparateFields:
- if (variantFieldName) {
- ythrow TApiUsageError() << "\"variant_field_name\" requires (NYT.oneof_flags) = VARIANT";
- }
- break;
- case EProtobufOneofMode::Variant:
- if (variantFieldName) {
- options.VariantFieldName = variantFieldName;
- } else {
- options.VariantFieldName = oneofDescriptor->name();
- }
- break;
- }
- return options;
- }
- TProtobufMessageOptions GetMessageOptions(const Descriptor* descriptor)
- {
- TProtobufMessageOptions options;
- ParseProtobufMessageOptions(
- descriptor->file()->options().GetRepeatedExtension(file_default_message_flags),
- &options);
- ParseProtobufMessageOptions(
- descriptor->options().GetRepeatedExtension(message_flags),
- &options);
- return options;
- }
- TNode MakeEnumerationConfig(const ::google::protobuf::EnumDescriptor* enumDescriptor)
- {
- auto config = TNode::CreateMap();
- for (int i = 0; i < enumDescriptor->value_count(); ++i) {
- config[enumDescriptor->value(i)->name()] = enumDescriptor->value(i)->number();
- }
- return config;
- }
- TString DeduceProtobufType(
- const FieldDescriptor* fieldDescriptor,
- const TProtobufFieldOptions& options)
- {
- if (options.Type) {
- ValidateProtobufType(*fieldDescriptor, *options.Type);
- return ToString(*options.Type);
- }
- switch (fieldDescriptor->type()) {
- case FieldDescriptor::TYPE_ENUM:
- return ToString(EProtobufType::EnumString);
- case FieldDescriptor::TYPE_MESSAGE:
- switch (options.SerializationMode) {
- case EProtobufSerializationMode::Protobuf:
- return "message";
- case EProtobufSerializationMode::Yt:
- return "structured_message";
- case EProtobufSerializationMode::Embedded:
- return "embedded_message";
- }
- Y_ABORT();
- default:
- return fieldDescriptor->type_name();
- }
- Y_ABORT();
- }
- TString GetColumnName(const ::google::protobuf::FieldDescriptor& field)
- {
- const auto& options = field.options();
- const auto columnName = options.GetExtension(column_name);
- if (!columnName.empty()) {
- return columnName;
- }
- const auto keyColumnName = options.GetExtension(key_column_name);
- if (!keyColumnName.empty()) {
- return keyColumnName;
- }
- return field.name();
- }
- TNode MakeProtoFormatMessageFieldsConfig(
- const Descriptor* descriptor,
- TNode* enumerations,
- TCycleChecker& cycleChecker);
- TNode MakeProtoFormatMessageFieldsConfig(
- const Descriptor* descriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- TCycleChecker& cycleChecker);
- TNode MakeMapFieldsConfig(
- const FieldDescriptor* fieldDescriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& fieldOptions,
- TCycleChecker& cycleChecker)
- {
- Y_ABORT_UNLESS(fieldDescriptor->is_map());
- auto message = fieldDescriptor->message_type();
- switch (fieldOptions.MapMode) {
- case EProtobufMapMode::ListOfStructsLegacy:
- return MakeProtoFormatMessageFieldsConfig(
- message,
- enumerations,
- cycleChecker);
- case EProtobufMapMode::ListOfStructs:
- case EProtobufMapMode::Dict:
- case EProtobufMapMode::OptionalDict: {
- TProtobufFieldOptions defaultFieldOptions;
- defaultFieldOptions.SerializationMode = EProtobufSerializationMode::Yt;
- return MakeProtoFormatMessageFieldsConfig(
- message,
- enumerations,
- defaultFieldOptions,
- TProtobufOneofOptions{},
- cycleChecker);
- }
- }
- Y_ABORT();
- }
- TNode MakeProtoFormatFieldConfig(
- const FieldDescriptor* fieldDescriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& defaultOptions,
- TCycleChecker& cycleChecker)
- {
- auto fieldConfig = TNode::CreateMap();
- fieldConfig["field_number"] = fieldDescriptor->number();
- fieldConfig["name"] = GetColumnName(*fieldDescriptor);
- auto fieldOptions = GetFieldOptions(fieldDescriptor, defaultOptions);
- Y_ENSURE(fieldOptions.SerializationMode != EProtobufSerializationMode::Embedded,
- "EMBEDDED flag is currently supported only with "
- "ProtobufFormatWithDescriptors config option set to true");
- if (fieldDescriptor->is_repeated()) {
- Y_ENSURE_EX(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt,
- TApiUsageError() << "Repeated field \"" << fieldDescriptor->full_name() << "\" " <<
- "must have flag \"" << EWrapperFieldFlag::SERIALIZATION_YT << "\"");
- }
- fieldConfig["repeated"] = fieldDescriptor->is_repeated();
- fieldConfig["packed"] = fieldDescriptor->is_packed();
- fieldConfig["proto_type"] = DeduceProtobufType(fieldDescriptor, fieldOptions);
- if (fieldDescriptor->type() == FieldDescriptor::TYPE_ENUM) {
- auto* enumeration = fieldDescriptor->enum_type();
- (*enumerations)[enumeration->full_name()] = MakeEnumerationConfig(enumeration);
- fieldConfig["enumeration_name"] = enumeration->full_name();
- }
- if (fieldOptions.SerializationMode != EProtobufSerializationMode::Yt) {
- return fieldConfig;
- }
- if (fieldDescriptor->is_map()) {
- fieldConfig["fields"] = MakeMapFieldsConfig(fieldDescriptor, enumerations, fieldOptions, cycleChecker);
- return fieldConfig;
- }
- if (fieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE) {
- fieldConfig["fields"] = MakeProtoFormatMessageFieldsConfig(
- fieldDescriptor->message_type(),
- enumerations,
- cycleChecker);
- }
- return fieldConfig;
- }
- void MakeProtoFormatOneofConfig(
- const OneofDescriptor* oneofDescriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- TCycleChecker& cycleChecker,
- TNode* fields)
- {
- auto addFields = [&] (TNode* fields) {
- for (int i = 0; i < oneofDescriptor->field_count(); ++i) {
- fields->Add(MakeProtoFormatFieldConfig(
- oneofDescriptor->field(i),
- enumerations,
- defaultFieldOptions,
- cycleChecker));
- }
- };
- auto oneofOptions = GetOneofOptions(oneofDescriptor, defaultOneofOptions);
- switch (oneofOptions.Mode) {
- case EProtobufOneofMode::SeparateFields:
- addFields(fields);
- return;
- case EProtobufOneofMode::Variant: {
- auto oneofFields = TNode::CreateList();
- addFields(&oneofFields);
- auto oneofField = TNode()
- ("proto_type", "oneof")
- ("name", oneofOptions.VariantFieldName)
- ("fields", std::move(oneofFields));
- fields->Add(std::move(oneofField));
- return;
- }
- }
- Y_ABORT();
- }
- TNode MakeProtoFormatMessageFieldsConfig(
- const Descriptor* descriptor,
- TNode* enumerations,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- TCycleChecker& cycleChecker)
- {
- auto fields = TNode::CreateList();
- THashSet<const OneofDescriptor*> visitedOneofs;
- auto guard = cycleChecker.Enter(descriptor);
- for (int fieldIndex = 0; fieldIndex < descriptor->field_count(); ++fieldIndex) {
- auto fieldDescriptor = descriptor->field(fieldIndex);
- auto oneofDescriptor = fieldDescriptor->containing_oneof();
- if (!oneofDescriptor) {
- fields.Add(MakeProtoFormatFieldConfig(
- fieldDescriptor,
- enumerations,
- defaultFieldOptions,
- cycleChecker));
- } else if (!visitedOneofs.contains(oneofDescriptor)) {
- MakeProtoFormatOneofConfig(
- oneofDescriptor,
- enumerations,
- defaultFieldOptions,
- defaultOneofOptions,
- cycleChecker,
- &fields);
- visitedOneofs.insert(oneofDescriptor);
- }
- }
- return fields;
- }
- TNode MakeProtoFormatMessageFieldsConfig(
- const Descriptor* descriptor,
- TNode* enumerations,
- TCycleChecker& cycleChecker)
- {
- return MakeProtoFormatMessageFieldsConfig(
- descriptor,
- enumerations,
- GetDefaultFieldOptions(descriptor),
- GetDefaultOneofOptions(descriptor),
- cycleChecker);
- }
- TNode MakeProtoFormatConfigWithTables(const TVector<const Descriptor*>& descriptors)
- {
- TNode config("protobuf");
- config.Attributes()
- ("enumerations", TNode::CreateMap())
- ("tables", TNode::CreateList());
- auto& enumerations = config.Attributes()["enumerations"];
- for (auto* descriptor : descriptors) {
- TCycleChecker cycleChecker;
- auto columns = MakeProtoFormatMessageFieldsConfig(descriptor, &enumerations, cycleChecker);
- config.Attributes()["tables"].Add(
- TNode()("columns", std::move(columns)));
- }
- return config;
- }
- ////////////////////////////////////////////////////////////////////////////////
- class TFileDescriptorSetBuilder
- {
- public:
- TFileDescriptorSetBuilder()
- : ExtensionFile_(EWrapperFieldFlag::descriptor()->file())
- { }
- void AddDescriptor(const Descriptor* descriptor)
- {
- auto [it, inserted] = AllDescriptors_.insert(descriptor);
- if (!inserted) {
- return;
- }
- const auto* containingType = descriptor->containing_type();
- while (containingType) {
- AddDescriptor(containingType);
- containingType = containingType->containing_type();
- }
- for (int i = 0; i < descriptor->field_count(); ++i) {
- AddField(descriptor->field(i));
- }
- }
- FileDescriptorSet Build()
- {
- THashSet<const FileDescriptor*> visitedFiles;
- TVector<const FileDescriptor*> fileTopoOrder;
- for (const auto* descriptor : AllDescriptors_) {
- TraverseDependencies(descriptor->file(), visitedFiles, fileTopoOrder);
- }
- THashSet<TString> messageTypeNames;
- THashSet<TString> enumTypeNames;
- for (const auto* descriptor : AllDescriptors_) {
- messageTypeNames.insert(descriptor->full_name());
- }
- for (const auto* enumDescriptor : EnumDescriptors_) {
- enumTypeNames.insert(enumDescriptor->full_name());
- }
- FileDescriptorSet fileDescriptorSetProto;
- for (const auto* file : fileTopoOrder) {
- auto* fileProto = fileDescriptorSetProto.add_file();
- file->CopyTo(fileProto);
- Strip(fileProto, messageTypeNames, enumTypeNames);
- }
- return fileDescriptorSetProto;
- }
- private:
- void AddField(const FieldDescriptor* fieldDescriptor)
- {
- if (fieldDescriptor->message_type()) {
- AddDescriptor(fieldDescriptor->message_type());
- }
- if (fieldDescriptor->enum_type()) {
- AddEnumDescriptor(fieldDescriptor->enum_type());
- }
- }
- void AddEnumDescriptor(const EnumDescriptor* enumDescriptor)
- {
- auto [it, inserted] = EnumDescriptors_.insert(enumDescriptor);
- if (!inserted) {
- return;
- }
- const auto* containingType = enumDescriptor->containing_type();
- while (containingType) {
- AddDescriptor(containingType);
- containingType = containingType->containing_type();
- }
- }
- void TraverseDependencies(
- const FileDescriptor* current,
- THashSet<const FileDescriptor*>& visited,
- TVector<const FileDescriptor*>& topoOrder)
- {
- auto [it, inserted] = visited.insert(current);
- if (!inserted) {
- return;
- }
- for (int i = 0; i < current->dependency_count(); ++i) {
- TraverseDependencies(current->dependency(i), visited, topoOrder);
- }
- topoOrder.push_back(current);
- }
- template <typename TOptions>
- void StripUnknownOptions(TOptions* options)
- {
- std::vector<const FieldDescriptor*> fields;
- auto reflection = options->GetReflection();
- reflection->ListFields(*options, &fields);
- for (auto field : fields) {
- if (field->is_extension() && field->file() != ExtensionFile_) {
- reflection->ClearField(options, field);
- }
- }
- }
- template <typename TRepeatedField, typename TPredicate>
- void RemoveIf(TRepeatedField* repeatedField, TPredicate predicate)
- {
- repeatedField->erase(
- std::remove_if(repeatedField->begin(), repeatedField->end(), predicate),
- repeatedField->end());
- }
- void Strip(
- const TString& containingTypePrefix,
- DescriptorProto* messageProto,
- const THashSet<TString>& messageTypeNames,
- const THashSet<TString>& enumTypeNames)
- {
- const auto prefix = containingTypePrefix + messageProto->name() + '.';
- RemoveIf(messageProto->mutable_nested_type(), [&] (const DescriptorProto& descriptorProto) {
- return !messageTypeNames.contains(prefix + descriptorProto.name());
- });
- RemoveIf(messageProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) {
- return !enumTypeNames.contains(prefix + enumDescriptorProto.name());
- });
- messageProto->clear_extension();
- StripUnknownOptions(messageProto->mutable_options());
- for (auto& fieldProto : *messageProto->mutable_field()) {
- StripUnknownOptions(fieldProto.mutable_options());
- }
- for (auto& oneofProto : *messageProto->mutable_oneof_decl()) {
- StripUnknownOptions(oneofProto.mutable_options());
- }
- for (auto& nestedTypeProto : *messageProto->mutable_nested_type()) {
- Strip(prefix, &nestedTypeProto, messageTypeNames, enumTypeNames);
- }
- for (auto& enumProto : *messageProto->mutable_enum_type()) {
- StripUnknownOptions(enumProto.mutable_options());
- for (auto& enumValue : *enumProto.mutable_value()) {
- StripUnknownOptions(enumValue.mutable_options());
- }
- }
- }
- void Strip(
- FileDescriptorProto* fileProto,
- const THashSet<TString>& messageTypeNames,
- const THashSet<TString>& enumTypeNames)
- {
- const auto prefix = fileProto->package().Empty()
- ? ""
- : fileProto->package() + '.';
- RemoveIf(fileProto->mutable_message_type(), [&] (const DescriptorProto& descriptorProto) {
- return !messageTypeNames.contains(prefix + descriptorProto.name());
- });
- RemoveIf(fileProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) {
- return !enumTypeNames.contains(prefix + enumDescriptorProto.name());
- });
- fileProto->clear_service();
- fileProto->clear_extension();
- StripUnknownOptions(fileProto->mutable_options());
- for (auto& messageProto : *fileProto->mutable_message_type()) {
- Strip(prefix, &messageProto, messageTypeNames, enumTypeNames);
- }
- for (auto& enumProto : *fileProto->mutable_enum_type()) {
- StripUnknownOptions(enumProto.mutable_options());
- for (auto& enumValue : *enumProto.mutable_value()) {
- StripUnknownOptions(enumValue.mutable_options());
- }
- }
- }
- private:
- const FileDescriptor* const ExtensionFile_;
- THashSet<const Descriptor*> AllDescriptors_;
- THashSet<const EnumDescriptor*> EnumDescriptors_;
- };
- TNode MakeProtoFormatConfigWithDescriptors(const TVector<const Descriptor*>& descriptors)
- {
- TFileDescriptorSetBuilder builder;
- auto typeNames = TNode::CreateList();
- for (const auto* descriptor : descriptors) {
- builder.AddDescriptor(descriptor);
- typeNames.Add(descriptor->full_name());
- }
- auto fileDescriptorSetText = builder.Build().ShortDebugString();
- TNode config("protobuf");
- config.Attributes()
- ("file_descriptor_set_text", std::move(fileDescriptorSetText))
- ("type_names", std::move(typeNames));
- return config;
- }
- ////////////////////////////////////////////////////////////////////////////////
- using TTypePtrOrOtherColumns = std::variant<NTi::TTypePtr, TOtherColumns>;
- struct TMember {
- TString Name;
- TTypePtrOrOtherColumns TypeOrOtherColumns;
- };
- ////////////////////////////////////////////////////////////////////////////////
- TValueTypeOrOtherColumns GetScalarFieldType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& options)
- {
- if (options.Type) {
- switch (*options.Type) {
- case EProtobufType::EnumInt:
- return EValueType::VT_INT64;
- case EProtobufType::EnumString:
- return EValueType::VT_STRING;
- case EProtobufType::Any:
- return EValueType::VT_ANY;
- case EProtobufType::OtherColumns:
- return TOtherColumns{};
- }
- Y_ABORT();
- }
- switch (fieldDescriptor.cpp_type()) {
- case FieldDescriptor::CPPTYPE_INT32:
- return EValueType::VT_INT32;
- case FieldDescriptor::CPPTYPE_INT64:
- return EValueType::VT_INT64;
- case FieldDescriptor::CPPTYPE_UINT32:
- return EValueType::VT_UINT32;
- case FieldDescriptor::CPPTYPE_UINT64:
- return EValueType::VT_UINT64;
- case FieldDescriptor::CPPTYPE_FLOAT:
- case FieldDescriptor::CPPTYPE_DOUBLE:
- return EValueType::VT_DOUBLE;
- case FieldDescriptor::CPPTYPE_BOOL:
- return EValueType::VT_BOOLEAN;
- case FieldDescriptor::CPPTYPE_STRING:
- case FieldDescriptor::CPPTYPE_MESSAGE:
- case FieldDescriptor::CPPTYPE_ENUM:
- return EValueType::VT_STRING;
- default:
- ythrow yexception() <<
- "Unexpected field type '" << fieldDescriptor.cpp_type_name() << "' " <<
- "for field " << fieldDescriptor.name();
- }
- }
- bool HasNameExtension(const FieldDescriptor& fieldDescriptor)
- {
- const auto& options = fieldDescriptor.options();
- return options.HasExtension(column_name) || options.HasExtension(key_column_name);
- }
- void SortFields(TVector<const FieldDescriptor*>& fieldDescriptors, EProtobufFieldSortOrder fieldSortOrder)
- {
- switch (fieldSortOrder) {
- case EProtobufFieldSortOrder::AsInProtoFile:
- return;
- case EProtobufFieldSortOrder::ByFieldNumber:
- SortBy(fieldDescriptors, [] (const FieldDescriptor* fieldDescriptor) {
- return fieldDescriptor->number();
- });
- return;
- }
- Y_ABORT();
- }
- NTi::TTypePtr CreateStruct(TStringBuf fieldName, TVector<TMember> members)
- {
- TVector<NTi::TStructType::TOwnedMember> structMembers;
- structMembers.reserve(members.size());
- for (auto& member : members) {
- std::visit(TOverloaded{
- [&] (TOtherColumns) {
- ythrow TApiUsageError() <<
- "Could not deduce YT type for field " << member.Name << " of " <<
- "embedded message field " << fieldName << " " <<
- "(note that " << EWrapperFieldFlag::OTHER_COLUMNS << " fields " <<
- "are not allowed inside embedded messages)";
- },
- [&] (NTi::TTypePtr& type) {
- structMembers.emplace_back(std::move(member.Name), std::move(type));
- },
- }, member.TypeOrOtherColumns);
- }
- return NTi::Struct(std::move(structMembers));
- }
- TMaybe<TVector<TString>> InferColumnFilter(const ::google::protobuf::Descriptor& descriptor)
- {
- auto isOtherColumns = [] (const ::google::protobuf::FieldDescriptor& field) {
- return GetFieldOptions(&field).Type == EProtobufType::OtherColumns;
- };
- TVector<TString> result;
- result.reserve(descriptor.field_count());
- for (int i = 0; i < descriptor.field_count(); ++i) {
- const auto& field = *descriptor.field(i);
- if (isOtherColumns(field)) {
- return {};
- }
- result.push_back(GetColumnName(field));
- }
- return result;
- }
- ////////////////////////////////////////////////////////////////////////////////
- class TTableSchemaInferrer
- {
- public:
- TTableSchemaInferrer(bool keepFieldsWithoutExtension)
- : KeepFieldsWithoutExtension_(keepFieldsWithoutExtension)
- { }
- TTableSchema InferSchema(const Descriptor& messageDescriptor);
- private:
- TTypePtrOrOtherColumns GetFieldType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& defaultOptions);
- void ProcessOneofField(
- TStringBuf containingFieldName,
- const OneofDescriptor& oneofDescriptor,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- EProtobufFieldSortOrder fieldSortOrder,
- TVector<TMember>* members);
- TVector<TMember> GetMessageMembers(
- TStringBuf containingFieldName,
- const Descriptor& fieldDescriptor,
- TProtobufFieldOptions defaultFieldOptions,
- std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder = std::nullopt);
- NTi::TTypePtr GetMessageType(
- const FieldDescriptor& fieldDescriptor,
- TProtobufFieldOptions defaultFieldOptions);
- NTi::TTypePtr GetMapType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& fieldOptions);
- private:
- void GetMessageMembersImpl(
- TStringBuf containingFieldName,
- const Descriptor& fieldDescriptor,
- TProtobufFieldOptions defaultFieldOptions,
- std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder,
- TVector<TMember>* members);
- private:
- const bool KeepFieldsWithoutExtension_;
- TCycleChecker CycleChecker_;
- };
- void TTableSchemaInferrer::ProcessOneofField(
- TStringBuf containingFieldName,
- const OneofDescriptor& oneofDescriptor,
- const TProtobufFieldOptions& defaultFieldOptions,
- const TProtobufOneofOptions& defaultOneofOptions,
- EProtobufFieldSortOrder fieldSortOrder,
- TVector<TMember>* members)
- {
- auto oneofOptions = GetOneofOptions(&oneofDescriptor, defaultOneofOptions);
- auto addFields = [&] (TVector<TMember>* members, bool removeOptionality) {
- TVector<const FieldDescriptor*> fieldDescriptors;
- for (int i = 0; i < oneofDescriptor.field_count(); ++i) {
- fieldDescriptors.push_back(oneofDescriptor.field(i));
- }
- SortFields(fieldDescriptors, fieldSortOrder);
- for (auto innerFieldDescriptor : fieldDescriptors) {
- auto typeOrOtherColumns = GetFieldType(
- *innerFieldDescriptor,
- defaultFieldOptions);
- if (auto* maybeType = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
- maybeType && removeOptionality && (*maybeType)->IsOptional())
- {
- typeOrOtherColumns = (*maybeType)->AsOptional()->GetItemType();
- }
- members->push_back(TMember{
- GetColumnName(*innerFieldDescriptor),
- std::move(typeOrOtherColumns),
- });
- }
- };
- switch (oneofOptions.Mode) {
- case EProtobufOneofMode::SeparateFields:
- addFields(members, /* removeOptionality */ false);
- return;
- case EProtobufOneofMode::Variant: {
- TVector<TMember> variantMembers;
- addFields(&variantMembers, /* removeOptionality */ true);
- members->push_back(TMember{
- oneofOptions.VariantFieldName,
- NTi::Optional(
- NTi::Variant(
- CreateStruct(containingFieldName, std::move(variantMembers))
- )
- )
- });
- return;
- }
- }
- Y_ABORT();
- }
- TVector<TMember> TTableSchemaInferrer::GetMessageMembers(
- TStringBuf containingFieldName,
- const Descriptor& messageDescriptor,
- TProtobufFieldOptions defaultFieldOptions,
- std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder)
- {
- TVector<TMember> members;
- GetMessageMembersImpl(
- containingFieldName,
- messageDescriptor,
- defaultFieldOptions,
- overrideFieldSortOrder,
- &members
- );
- return members;
- }
- void TTableSchemaInferrer::GetMessageMembersImpl(
- TStringBuf containingFieldName,
- const Descriptor& messageDescriptor,
- TProtobufFieldOptions defaultFieldOptions,
- std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder,
- TVector<TMember>* members)
- {
- auto guard = CycleChecker_.Enter(&messageDescriptor);
- defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor, defaultFieldOptions);
- auto messageOptions = GetMessageOptions(&messageDescriptor);
- auto defaultOneofOptions = GetDefaultOneofOptions(&messageDescriptor);
- TVector<const FieldDescriptor*> fieldDescriptors;
- fieldDescriptors.reserve(messageDescriptor.field_count());
- for (int i = 0; i < messageDescriptor.field_count(); ++i) {
- if (!KeepFieldsWithoutExtension_ && !HasNameExtension(*messageDescriptor.field(i))) {
- continue;
- }
- fieldDescriptors.push_back(messageDescriptor.field(i));
- }
- auto fieldSortOrder = overrideFieldSortOrder.value_or(messageOptions.FieldSortOrder);
- SortFields(fieldDescriptors, fieldSortOrder);
- THashSet<const OneofDescriptor*> visitedOneofs;
- for (const auto innerFieldDescriptor : fieldDescriptors) {
- auto oneofDescriptor = innerFieldDescriptor->containing_oneof();
- if (oneofDescriptor) {
- if (visitedOneofs.contains(oneofDescriptor)) {
- continue;
- }
- ProcessOneofField(
- containingFieldName,
- *oneofDescriptor,
- defaultFieldOptions,
- defaultOneofOptions,
- messageOptions.FieldSortOrder,
- members);
- visitedOneofs.insert(oneofDescriptor);
- continue;
- }
- auto fieldOptions = GetFieldOptions(innerFieldDescriptor, defaultFieldOptions);
- if (fieldOptions.SerializationMode == EProtobufSerializationMode::Embedded) {
- Y_ENSURE(innerFieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE,
- "EMBEDDED column must have message type");
- Y_ENSURE(innerFieldDescriptor->label() == FieldDescriptor::LABEL_REQUIRED,
- "EMBEDDED column must be marked required");
- GetMessageMembersImpl(
- innerFieldDescriptor->full_name(),
- *innerFieldDescriptor->message_type(),
- defaultFieldOptions,
- /*overrideFieldSortOrder*/ std::nullopt,
- members);
- } else {
- auto typeOrOtherColumns = GetFieldType(
- *innerFieldDescriptor,
- defaultFieldOptions);
- members->push_back(TMember{
- GetColumnName(*innerFieldDescriptor),
- std::move(typeOrOtherColumns),
- });
- }
- }
- }
- NTi::TTypePtr TTableSchemaInferrer::GetMessageType(
- const FieldDescriptor& fieldDescriptor,
- TProtobufFieldOptions defaultFieldOptions)
- {
- Y_ABORT_UNLESS(fieldDescriptor.message_type());
- const auto& messageDescriptor = *fieldDescriptor.message_type();
- auto members = GetMessageMembers(
- fieldDescriptor.full_name(),
- messageDescriptor,
- defaultFieldOptions);
- return CreateStruct(fieldDescriptor.full_name(), std::move(members));
- }
- NTi::TTypePtr TTableSchemaInferrer::GetMapType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& fieldOptions)
- {
- Y_ABORT_UNLESS(fieldDescriptor.is_map());
- switch (fieldOptions.MapMode) {
- case EProtobufMapMode::ListOfStructsLegacy:
- case EProtobufMapMode::ListOfStructs: {
- TProtobufFieldOptions embeddedOptions;
- if (fieldOptions.MapMode == EProtobufMapMode::ListOfStructs) {
- embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt;
- }
- auto list = NTi::List(GetMessageType(fieldDescriptor, embeddedOptions));
- switch (fieldOptions.ListMode) {
- case EProtobufListMode::Required:
- return list;
- case EProtobufListMode::Optional:
- return NTi::Optional(std::move(list));
- }
- Y_ABORT();
- }
- case EProtobufMapMode::Dict:
- case EProtobufMapMode::OptionalDict: {
- auto message = fieldDescriptor.message_type();
- Y_ABORT_UNLESS(message->field_count() == 2);
- auto keyVariant = GetScalarFieldType(*message->field(0), TProtobufFieldOptions{});
- Y_ABORT_UNLESS(std::holds_alternative<EValueType>(keyVariant));
- auto key = std::get<EValueType>(keyVariant);
- TProtobufFieldOptions embeddedOptions;
- embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt;
- auto valueVariant = GetFieldType(*message->field(1), embeddedOptions);
- Y_ABORT_UNLESS(std::holds_alternative<NTi::TTypePtr>(valueVariant));
- auto value = std::get<NTi::TTypePtr>(valueVariant);
- Y_ABORT_UNLESS(value->IsOptional());
- value = value->AsOptional()->GetItemType();
- auto dict = NTi::Dict(ToTypeV3(key, true), value);
- if (fieldOptions.MapMode == EProtobufMapMode::OptionalDict) {
- return NTi::Optional(dict);
- } else {
- return dict;
- }
- }
- }
- }
- TTypePtrOrOtherColumns TTableSchemaInferrer::GetFieldType(
- const FieldDescriptor& fieldDescriptor,
- const TProtobufFieldOptions& defaultOptions)
- {
- auto fieldOptions = GetFieldOptions(&fieldDescriptor, defaultOptions);
- if (fieldOptions.Type) {
- ValidateProtobufType(fieldDescriptor, *fieldOptions.Type);
- }
- auto getScalarType = [&] {
- auto valueTypeOrOtherColumns = GetScalarFieldType(fieldDescriptor, fieldOptions);
- return std::visit(TOverloaded{
- [] (TOtherColumns) -> TTypePtrOrOtherColumns {
- return TOtherColumns{};
- },
- [] (EValueType valueType) -> TTypePtrOrOtherColumns {
- return ToTypeV3(valueType, true);
- }
- }, valueTypeOrOtherColumns);
- };
- auto withFieldLabel = [&] (const TTypePtrOrOtherColumns& typeOrOtherColumns) -> TTypePtrOrOtherColumns {
- switch (fieldDescriptor.label()) {
- case FieldDescriptor::Label::LABEL_REPEATED: {
- Y_ENSURE(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt,
- "Repeated fields are supported only for YT serialization mode, field \"" + fieldDescriptor.full_name() +
- "\" has incorrect serialization mode");
- auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
- Y_ENSURE(type, "OTHER_COLUMNS field can not be repeated");
- switch (fieldOptions.ListMode) {
- case EProtobufListMode::Required:
- return NTi::TTypePtr(NTi::List(*type));
- case EProtobufListMode::Optional:
- return NTi::TTypePtr(NTi::Optional(NTi::List(*type)));
- }
- Y_ABORT();
- }
- case FieldDescriptor::Label::LABEL_OPTIONAL:
- return std::visit(TOverloaded{
- [] (TOtherColumns) -> TTypePtrOrOtherColumns {
- return TOtherColumns{};
- },
- [] (NTi::TTypePtr type) -> TTypePtrOrOtherColumns {
- return NTi::TTypePtr(NTi::Optional(std::move(type)));
- }
- }, typeOrOtherColumns);
- case FieldDescriptor::LABEL_REQUIRED: {
- auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
- Y_ENSURE(type, "OTHER_COLUMNS field can not be required");
- return *type;
- }
- }
- Y_ABORT();
- };
- switch (fieldOptions.SerializationMode) {
- case EProtobufSerializationMode::Protobuf:
- return withFieldLabel(getScalarType());
- case EProtobufSerializationMode::Yt:
- if (fieldDescriptor.type() == FieldDescriptor::TYPE_MESSAGE) {
- if (fieldDescriptor.is_map()) {
- return GetMapType(fieldDescriptor, fieldOptions);
- } else {
- return withFieldLabel(GetMessageType(fieldDescriptor, TProtobufFieldOptions{}));
- }
- } else {
- return withFieldLabel(getScalarType());
- }
- case EProtobufSerializationMode::Embedded:
- ythrow yexception() << "EMBEDDED field is not allowed for field "
- << fieldDescriptor.full_name();
- }
- Y_ABORT();
- }
- TTableSchema TTableSchemaInferrer::InferSchema(const Descriptor& messageDescriptor)
- {
- TTableSchema result;
- auto defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor);
- auto members = GetMessageMembers(
- messageDescriptor.full_name(),
- messageDescriptor,
- defaultFieldOptions,
- // Use special sort order for top level messages.
- /*overrideFieldSortOrder*/ EProtobufFieldSortOrder::AsInProtoFile);
- for (auto& member : members) {
- std::visit(TOverloaded{
- [&] (TOtherColumns) {
- result.Strict(false);
- },
- [&] (NTi::TTypePtr& type) {
- result.AddColumn(TColumnSchema()
- .Name(std::move(member.Name))
- .Type(std::move(type))
- );
- },
- }, member.TypeOrOtherColumns);
- }
- return result;
- }
- TTableSchema CreateTableSchemaImpl(
- const Descriptor& messageDescriptor,
- bool keepFieldsWithoutExtension)
- {
- TTableSchemaInferrer inferrer(keepFieldsWithoutExtension);
- return inferrer.InferSchema(messageDescriptor);
- }
- ////////////////////////////////////////////////////////////////////////////////
- } // namespace NYT::NDetail
- ////////////////////////////////////////////////////////////////////////////////
- template <>
- void Out<NYT::EWrapperFieldFlag::Enum>(IOutputStream& stream, NYT::EWrapperFieldFlag::Enum value)
- {
- stream << NYT::EWrapperFieldFlag_Enum_Name(value);
- }
- template <>
- void Out<NYT::EWrapperMessageFlag::Enum>(IOutputStream& stream, NYT::EWrapperMessageFlag::Enum value)
- {
- stream << NYT::EWrapperMessageFlag_Enum_Name(value);
- }
- template <>
- void Out<NYT::EWrapperOneofFlag::Enum>(IOutputStream& stream, NYT::EWrapperOneofFlag::Enum value)
- {
- stream << NYT::EWrapperOneofFlag_Enum_Name(value);
- }
|