#include #include #include #include #include #include #include #include #include #include #include using namespace NYql::NUdf; using namespace NYql::NDom; using namespace NYsonPull; namespace { constexpr char OptionsResourceName[] = "Yson2.Options"; using TOptionsResource = TResource; using TNodeResource = TResource; using TDictType = TDict; using TInt64DictType = TDict; using TUint64DictType = TDict; using TBoolDictType = TDict; using TDoubleDictType = TDict; using TStringDictType = TDict; enum class EOptions : ui8 { Strict = 1, AutoConvert = 2 }; union TOpts { ui8 Raw = 0; struct { bool Strict: 1; bool AutoConvert: 1; }; }; static_assert(sizeof(TOpts) == 1U, "Wrong TOpts size."); TOpts ParseOptions(TUnboxedValuePod x) { if (x) { return TOpts{x.Get()}; } return {}; } class TOptions : public TBoxedValue { TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { ui8 options = 0; if (args[0] && args[0].Get()) { options |= ui8(EOptions::AutoConvert); } if (args[1] && args[1].Get()) { options |= ui8(EOptions::Strict); } return TUnboxedValuePod(options); } public: static const TStringRef& Name() { static auto name = TStringRef::Of("Options"); return name; } static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { auto argsBuilder = builder.Args(2U); argsBuilder->Add>().Name(TStringRef::Of("AutoConvert")); argsBuilder->Add>().Name(TStringRef::Of("Strict")); builder.Returns(builder.Resource(OptionsResourceName)); builder.OptionalArgs(2U); if (!typesOnly) { builder.Implementation(new TOptions); } builder.IsStrict(); return true; } else { return false; } } }; using TConverterPtr = TUnboxedValuePod (*)(TUnboxedValuePod, const IValueBuilder*, const TSourcePosition& pos); template class TLazyConveterT : public TManagedBoxedValue { public: TLazyConveterT(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) : Original(std::move(original)), ValueBuilder(valueBuilder), Pos_(pos) {} private: template class TIterator: public TManagedBoxedValue { public: TIterator(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) : Original(std::move(original)), ValueBuilder(valueBuilder), Pos_(pos) {} private: bool Skip() final { return Original.Skip(); } bool Next(TUnboxedValue& value) final { if (Original.Next(value)) { if constexpr (!NoSwap) { value = Converter(value.Release(), ValueBuilder, Pos_); } return true; } return false; } bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final { if (Original.NextPair(key, payload)) { if constexpr (NoSwap) { payload = Converter(payload.Release(), ValueBuilder, Pos_); } else { key = Converter(key.Release(), ValueBuilder, Pos_); } return true; } return false; } const TUnboxedValue Original; const IValueBuilder *const ValueBuilder; const TSourcePosition Pos_; }; ui64 GetDictLength() const final { return Original.GetDictLength(); } ui64 GetListLength() const final { return Original.GetListLength(); } bool HasFastListLength() const final { return Original.HasFastListLength(); } bool HasDictItems() const final { return Original.HasDictItems(); } bool HasListItems() const final { return Original.HasListItems(); } TUnboxedValue GetListIterator() const final { return TUnboxedValuePod(new TIterator(Original.GetListIterator(), ValueBuilder, Pos_)); } TUnboxedValue GetDictIterator() const final { return TUnboxedValuePod(new TIterator(Original.GetDictIterator(), ValueBuilder, Pos_)); } TUnboxedValue GetKeysIterator() const final { return TUnboxedValuePod(new TIterator(Original.GetKeysIterator(), ValueBuilder, Pos_)); } TUnboxedValue GetPayloadsIterator() const override { return TUnboxedValuePod(new TIterator(Original.GetPayloadsIterator(), ValueBuilder, Pos_)); } bool Contains(const TUnboxedValuePod& key) const final { return Original.Contains(key); } TUnboxedValue Lookup(const TUnboxedValuePod& key) const final { if (auto lookup = Original.Lookup(key)) { return Converter(lookup.Release().GetOptionalValue(), ValueBuilder, Pos_).MakeOptional(); } return {}; } bool IsSortedDict() const final { return Original.IsSortedDict(); } const TUnboxedValue Original; const IValueBuilder *const ValueBuilder; const TSourcePosition Pos_; }; template TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { if (!x) { return valueBuilder->NewEmptyList().Release(); } switch (GetNodeType(x)) { case ENodeType::List: if (!x.IsBoxed()) break; if constexpr (Converter != nullptr) { if constexpr (Strict || AutoConvert) { return TUnboxedValuePod(new TLazyConveterT(x, valueBuilder, pos)); } else { TSmallVec values; if (const auto elements = x.GetElements()) { const auto size = x.GetListLength(); values.reserve(size); for (ui32 i = 0U; i < size; ++i) { if (auto converted = Converter(elements[i], valueBuilder, pos)) { values.emplace_back(std::move(converted)); } } } else { const auto it = x.GetListIterator(); for (TUnboxedValue v; it.Next(v);) { if (auto converted = Converter(v.Release(), valueBuilder, pos)) { values.emplace_back(std::move(converted)); } } } if (values.empty()) { break; } return valueBuilder->NewList(values.data(), values.size()).Release(); } } return x; case ENodeType::Attr: return ConvertToListImpl(x.GetVariantItem().Release(), valueBuilder, pos); default: if constexpr (Strict) { if (!IsNodeType(x)) { UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse list from " << TDebugPrinter(x)).c_str()); } } } return valueBuilder->NewEmptyList().Release(); } template TUnboxedValuePod ConvertToDictImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { if (!x) { return valueBuilder->NewEmptyList().Release(); } switch (GetNodeType(x)) { case ENodeType::Dict: if (!x.IsBoxed()) break; if constexpr (Converter != nullptr) { if constexpr (Strict || AutoConvert) { return TUnboxedValuePod(new TLazyConveterT(x, valueBuilder, pos)); } else if (const auto size = x.GetDictLength()) { TSmallVec> pairs; pairs.reserve(size); const auto it = x.GetDictIterator(); for (TUnboxedValue key, payload; it.NextPair(key, payload);) { if (auto converted = Converter(payload, valueBuilder, pos)) { pairs.emplace_back(std::move(key), std::move(converted)); } } if (pairs.empty()) { break; } return TUnboxedValuePod(IBoxedValuePtr(new TMapNode(pairs.data(), pairs.size()))); } } return x; case ENodeType::Attr: return ConvertToDictImpl(x.GetVariantItem().Release(), valueBuilder, pos); default: if constexpr (Strict) { if (!IsNodeType(x)) { UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse dict from " << TDebugPrinter(x)).c_str()); } } } return valueBuilder->NewEmptyList().Release(); } template TUnboxedValuePod LookupImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { switch (GetNodeType(dict)) { case ENodeType::Dict: if (dict.IsBoxed()) { if (auto payload = dict.Lookup(key)) { if constexpr (Converter != nullptr) { return Converter(payload.Release().GetOptionalValue(), valueBuilder, pos); } return payload.Release(); } } return {}; case ENodeType::List: if (dict.IsBoxed()) { if (const i32 size = dict.GetListLength()) { if (i32 index; TryFromString(key.AsStringRef(), index) && index < size && index >= -size) { if (index < 0) index += size; if constexpr (Converter != nullptr) { return Converter(dict.Lookup(TUnboxedValuePod(index)).Release(), valueBuilder, pos); } return dict.Lookup(TUnboxedValuePod(index)).Release(); } } } return {}; case ENodeType::Attr: return LookupImpl(dict.GetVariantItem().Release(), key, valueBuilder, pos); default: return {}; } } template TUnboxedValuePod YPathImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { const std::string_view path = key.AsStringRef(); if (path.size() < 2U || path.front() != '/' || path.back() == '/') { UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Invalid YPath: '" << path << "'.").data()); } for (const auto s : StringSplitter(path.substr(path[1U] == '/' ? 2U : 1U)).Split('/')) { const bool attr = IsNodeType(dict); if (const std::string_view subpath = s.Token(); subpath == "@") { if (attr) dict = SetNodeType(dict); else return {}; } else { if (attr) { dict = dict.GetVariantItem().Release(); } const auto subkey = valueBuilder->SubString(key, std::distance(path.begin(), subpath.begin()), subpath.size()); dict = LookupImpl(dict, subkey, valueBuilder, pos); } if (!dict) { return {}; } } if constexpr (Converter != nullptr) { return Converter(dict, valueBuilder, pos); } return dict; } template TUnboxedValuePod ContainsImpl(TUnboxedValuePod dict, TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { switch (GetNodeType(dict)) { case ENodeType::Attr: return ContainsImpl(dict.GetVariantItem().Release(), key, valueBuilder, pos); case ENodeType::Dict: if (dict.IsBoxed()) return TUnboxedValuePod(dict.Contains(key)); else return TUnboxedValuePod(false); case ENodeType::List: if (dict.IsBoxed()) { if (const i32 size = dict.GetListLength()) { if (i32 index; TryFromString(key.AsStringRef(), index)) { return TUnboxedValuePod(index < size && index >= -size); } } } return TUnboxedValuePod(false); default: if constexpr (Strict && !AutoConvert) UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't check contains on scalar " << TDebugPrinter(dict)).c_str()); else return {}; } } template TUnboxedValuePod GetLengthImpl(TUnboxedValuePod dict, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { switch (GetNodeType(dict)) { case ENodeType::Attr: return GetLengthImpl(dict.GetVariantItem().Release(), valueBuilder, pos); case ENodeType::Dict: return TUnboxedValuePod(dict.IsBoxed() ? dict.GetDictLength() : ui64(0)); case ENodeType::List: return TUnboxedValuePod(dict.IsBoxed() ? dict.GetListLength() : ui64(0)); default: if constexpr (Strict && !AutoConvert) UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't get container length from scalar " << TDebugPrinter(dict)).c_str()); else return {}; } } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBool, TOptional(TAutoMap, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToBool : &ConvertToBool)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToBool : &ConvertToBool)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64, TOptional(TAutoMap, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToIntegral : &ConvertToIntegral)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToIntegral : &ConvertToIntegral)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64, TOptional(TAutoMap, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToIntegral : &ConvertToIntegral)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToIntegral : &ConvertToIntegral)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDouble, TOptional(TAutoMap, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToFloat : &ConvertToFloat)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToFloat : &ConvertToFloat)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToString, TOptional(TAutoMap, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToString : &ConvertToString)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToString : &ConvertToString)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToList, TListType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToListImpl : &ConvertToListImpl)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToListImpl : &ConvertToListImpl)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64List, TListType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64List, TListType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolList, TListType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleList, TListType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringList, TListType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToListImpl> : &ConvertToListImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDict, TDictType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToDictImpl : &ConvertToDictImpl)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToDictImpl : &ConvertToDictImpl)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64Dict, TInt64DictType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64Dict, TUint64DictType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolDict, TBoolDictType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleDict, TDoubleDictType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringDict, TStringDictType(TOptional, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &ConvertToDictImpl> : &ConvertToDictImpl>)(args[0], valueBuilder, GetPos()); } SIMPLE_STRICT_UDF(TAttributes, TDictType(TAutoMap)) { const auto x = args[0]; if (IsNodeType(x)) { return x; } return valueBuilder->NewEmptyList(); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TContains, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &ContainsImpl : &ContainsImpl)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &ContainsImpl : &ContainsImpl)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetLength, TOptional(TAutoMap, TOptional), 1) { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &GetLengthImpl : &GetLengthImpl)(args[0], valueBuilder, GetPos()); else return (options.AutoConvert ? &GetLengthImpl : &GetLengthImpl)(args[0], valueBuilder, GetPos()); } SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TLookup, TOptional(TAutoMap, char*, TOptional), 1) { return LookupImpl(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupBool, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &LookupImpl<&ConvertToBool> : &LookupImpl<&ConvertToBool>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &LookupImpl<&ConvertToBool> : &LookupImpl<&ConvertToBool>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupInt64, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral> : &LookupImpl<&ConvertToIntegral>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral> : &LookupImpl<&ConvertToIntegral>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupUint64, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral> : &LookupImpl<&ConvertToIntegral>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral> : &LookupImpl<&ConvertToIntegral>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDouble, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &LookupImpl<&ConvertToFloat> : &LookupImpl<&ConvertToFloat>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &LookupImpl<&ConvertToFloat> : &LookupImpl<&ConvertToFloat>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupString, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &LookupImpl<&ConvertToString> : &LookupImpl<&ConvertToString>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &LookupImpl<&ConvertToString> : &LookupImpl<&ConvertToString>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupList, TOptional>(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl> : &LookupImpl<&ConvertToListImpl>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl> : &LookupImpl<&ConvertToListImpl>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDict, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl> : &LookupImpl<&ConvertToDictImpl>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl> : &LookupImpl<&ConvertToDictImpl>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPath, TOptional(TAutoMap, char*, TOptional), 1) { return YPathImpl(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathBool, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &YPathImpl<&ConvertToBool> : &YPathImpl<&ConvertToBool>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &YPathImpl<&ConvertToBool> : &YPathImpl<&ConvertToBool>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathInt64, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral> : &YPathImpl<&ConvertToIntegral>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral> : &YPathImpl<&ConvertToIntegral>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathUint64, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral> : &YPathImpl<&ConvertToIntegral>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral> : &YPathImpl<&ConvertToIntegral>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDouble, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &YPathImpl<&ConvertToFloat> : &YPathImpl<&ConvertToFloat>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &YPathImpl<&ConvertToFloat> : &YPathImpl<&ConvertToFloat>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathString, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &YPathImpl<&ConvertToString> : &YPathImpl<&ConvertToString>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &YPathImpl<&ConvertToString> : &YPathImpl<&ConvertToString>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathList, TOptional>(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl> : &YPathImpl<&ConvertToListImpl>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl> : &YPathImpl<&ConvertToListImpl>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDict, TOptional(TAutoMap, char*, TOptional), 1) { if (const auto options = ParseOptions(args[2]); options.Strict) return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl> : &YPathImpl<&ConvertToDictImpl>)(args[0], args[1], valueBuilder, GetPos()); else return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl> : &YPathImpl<&ConvertToDictImpl>)(args[0], args[1], valueBuilder, GetPos()); } SIMPLE_STRICT_UDF(TSerialize, TYson(TAutoMap)) { return valueBuilder->NewString(SerializeYsonDomToBinary(args[0])); } SIMPLE_STRICT_UDF(TSerializeText, TYson(TAutoMap)) { return valueBuilder->NewString(SerializeYsonDomToText(args[0])); } SIMPLE_STRICT_UDF(TSerializePretty, TYson(TAutoMap)) { return valueBuilder->NewString(SerializeYsonDomToPrettyText(args[0])); } constexpr char SkipMapEntity[] = "SkipMapEntity"; constexpr char EncodeUtf8[] = "EncodeUtf8"; constexpr char WriteNanAsString[] = "WriteNanAsString"; SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional(TAutoMap, TOptional, TNamedArg, TNamedArg, TNamedArg), 4) try { return valueBuilder->NewString(SerializeJsonDom(args[0], args[2].GetOrDefault(false), args[3].GetOrDefault(false), args[4].GetOrDefault(false))); } catch (const std::exception& e) { if (ParseOptions(args[1]).Strict) { UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(GetPos()) << " " << e.what()).data()); } return {}; } SIMPLE_STRICT_UDF(TWithAttributes, TOptional(TAutoMap, TAutoMap)) { Y_UNUSED(valueBuilder); TUnboxedValue x = args[0]; auto y = args[1]; if (!IsNodeType(y)) { return {}; } if (IsNodeType(x)) { x = x.GetVariantItem(); } if (y.IsEmbedded()) { return x; } if (!y.IsBoxed()) { return {}; } // clone dict as attrnode if (const auto resource = y.GetResource()) { return SetNodeType(TUnboxedValuePod(new TAttrNode(std::move(x), static_cast(resource), y.GetDictLength()))); } else { TSmallVec> items; items.reserve(y.GetDictLength()); const auto it = y.GetDictIterator(); for (TUnboxedValue x, y; it.NextPair(x, y);) { items.emplace_back(std::move(x), std::move(y)); } if (items.empty()) { return x; } return SetNodeType(TUnboxedValuePod(new TAttrNode(std::move(x), items.data(), items.size()))); } } template TUnboxedValuePod IsTypeImpl(TUnboxedValuePod y) { if (IsNodeType(y)) { y = y.GetVariantItem().Release(); } return TUnboxedValuePod(IsNodeType(y)); } SIMPLE_STRICT_UDF(TIsString, bool(TAutoMap)) { Y_UNUSED(valueBuilder); return IsTypeImpl(*args); } SIMPLE_STRICT_UDF(TIsInt64, bool(TAutoMap)) { Y_UNUSED(valueBuilder); return IsTypeImpl(*args); } SIMPLE_STRICT_UDF(TIsUint64, bool(TAutoMap)) { Y_UNUSED(valueBuilder); return IsTypeImpl(*args); } SIMPLE_STRICT_UDF(TIsBool, bool(TAutoMap)) { Y_UNUSED(valueBuilder); return IsTypeImpl(*args); } SIMPLE_STRICT_UDF(TIsDouble, bool(TAutoMap)) { Y_UNUSED(valueBuilder); return IsTypeImpl(*args); } SIMPLE_STRICT_UDF(TIsList, bool(TAutoMap)) { Y_UNUSED(valueBuilder); return IsTypeImpl(*args); } SIMPLE_STRICT_UDF(TIsDict, bool(TAutoMap)) { Y_UNUSED(valueBuilder); return IsTypeImpl(*args); } SIMPLE_STRICT_UDF(TIsEntity, bool(TAutoMap)) { Y_UNUSED(valueBuilder); return IsTypeImpl(*args); } SIMPLE_STRICT_UDF(TEquals, bool(TAutoMap, TAutoMap)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(EquateDoms(args[0], args[1])); } SIMPLE_STRICT_UDF(TGetHash, ui64(TAutoMap)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(HashDom(args[0])); } namespace { class TBase: public TBoxedValue { public: typedef bool TTypeAwareMarker; TBase(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) : Pos_(pos), TypeHelper_(typeHelper), Shape_(shape) {} protected: template static const TType* CheckType(const ITypeInfoHelper::TPtr typeHelper, const TType* shape) { switch (const auto kind = typeHelper->GetTypeKind(shape)) { case ETypeKind::Null: case ETypeKind::Void: case ETypeKind::EmptyList: case ETypeKind::EmptyDict: return MoreTypesAllowed ? nullptr : shape; case ETypeKind::Data: switch (TDataTypeInspector(*typeHelper, shape).GetTypeId()) { case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: case TDataType::Id: return nullptr; default: return shape; } case ETypeKind::Optional: return CheckType(typeHelper, TOptionalTypeInspector(*typeHelper, shape).GetItemType()); case ETypeKind::List: return CheckType(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType()); case ETypeKind::Dict: { const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape); if (const auto keyType = dictTypeInspector.GetKeyType(); ETypeKind::Data == typeHelper->GetTypeKind(keyType)) if (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId(); keyId == TDataType::Id || keyId == TDataType::Id) return CheckType(typeHelper, dictTypeInspector.GetValueType()); return shape; } case ETypeKind::Tuple: if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) do if (const auto bad = CheckType(typeHelper, tupleTypeInspector.GetElementType(--count))) return bad; while (count); return nullptr; case ETypeKind::Struct: if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) do if (const auto bad = CheckType(typeHelper, structTypeInspector.GetMemberType(--count))) return bad; while (count); return nullptr; case ETypeKind::Variant: if constexpr (MoreTypesAllowed) return CheckType(typeHelper, TVariantTypeInspector(*typeHelper, shape).GetUnderlyingType()); else return shape; case ETypeKind::Resource: if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) return nullptr; [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME default: return shape; } } const TSourcePosition Pos_; const ITypeInfoHelper::TPtr TypeHelper_; const TType *const Shape_; }; class TFrom: public TBase { TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { return MakeDom(TypeHelper_.Get(), Shape_, *args, valueBuilder); } public: static const TStringRef& Name() { static auto name = TStringRef::Of("From"); return name; } TFrom(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) : TBase(pos, typeHelper, shape) {} static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { if (!userType) { builder.SetError("Missing user type."); return true; } builder.UserType(userType); const auto typeHelper = builder.TypeInfoHelper(); const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { builder.SetError("Invalid user type."); return true; } const auto argsTypeTuple = userTypeInspector.GetElementType(0); const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); if (!argsTypeInspector) { builder.SetError("Invalid user type - expected tuple."); return true; } if (argsTypeInspector.GetElementsCount() != 1) { builder.SetError("Expected single argument."); return true; } const auto inputType = argsTypeInspector.GetElementType(0); if (const auto badType = CheckType(typeHelper, inputType)) { ::TStringBuilder sb; sb << "Impossible to create DOM from incompatible with Yson type: "; TTypePrinter(*typeHelper, inputType).Out(sb.Out); if (badType != inputType) { sb << " Incompatible type: "; TTypePrinter(*typeHelper, badType).Out(sb.Out); } builder.SetError(sb); return true; } builder.Args()->Add(inputType).Done().Returns(builder.Resource(NodeResourceName)); if (!typesOnly) { builder.Implementation(new TFrom(builder.GetSourcePosition(), typeHelper, inputType)); } builder.IsStrict(); return true; } else { return false; } } }; class TConvert: public TBase { TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { if (const auto options = ParseOptions(args[1]); options.Strict) return (options.AutoConvert ? &PeelDom : &PeelDom)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_); else return (options.AutoConvert ? &PeelDom : &PeelDom)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_); } public: TConvert(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) : TBase(pos, typeHelper, shape) {} static const TStringRef& Name() { static auto name = TStringRef::Of("ConvertTo"); return name; } static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { const auto optionsType = builder.Optional()->Item(builder.Resource(OptionsResourceName)).Build(); builder.OptionalArgs(1); if (!userType) { builder.SetError("Missing user type."); return true; } builder.UserType(userType); const auto typeHelper = builder.TypeInfoHelper(); const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); if (!userTypeInspector || userTypeInspector.GetElementsCount() < 3) { builder.SetError("Invalid user type."); return true; } const auto argsTypeTuple = userTypeInspector.GetElementType(0); const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); if (!argsTypeInspector) { builder.SetError("Invalid user type - expected tuple."); return true; } if (const auto argsCount = argsTypeInspector.GetElementsCount(); argsCount < 1 || argsCount > 2) { ::TStringBuilder sb; sb << "Invalid user type - expected one or two arguments, got: " << argsCount; builder.SetError(sb); return true; } const auto resultType = userTypeInspector.GetElementType(2); if (const auto badType = CheckType(typeHelper, resultType)) { ::TStringBuilder sb; sb << "Impossible to convert DOM to incompatible with Yson type: "; TTypePrinter(*typeHelper, resultType).Out(sb.Out); if (badType != resultType) { sb << " Incompatible type: "; TTypePrinter(*typeHelper, badType).Out(sb.Out); } builder.SetError(sb); return true; } builder.Args()->Add(builder.Resource(NodeResourceName)).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionsType); builder.Returns(resultType); if (!typesOnly) { builder.Implementation(new TConvert(builder.GetSourcePosition(), typeHelper, resultType)); } return true; } else { return false; } } }; template class TParse: public TBoxedValue { public: typedef bool TTypeAwareMarker; private: const TSourcePosition Pos_; const bool StrictType_; TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final; public: TParse(TSourcePosition pos, bool strictType) : Pos_(pos), StrictType_(strictType) {} static const TStringRef& Name(); static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { auto typeId = TDataType::Id; if (userType) { const auto typeHelper = builder.TypeInfoHelper(); const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { builder.SetError("Missing or invalid user type."); return true; } const auto argsTypeTuple = userTypeInspector.GetElementType(0); const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); if (!argsTypeInspector) { builder.SetError("Invalid user type - expected tuple."); return true; } const auto argsCount = argsTypeInspector.GetElementsCount(); if (argsCount < 1 || argsCount > 2) { ::TStringBuilder sb; sb << "Invalid user type - expected one or two arguments, got: " << argsCount; builder.SetError(sb); return true; } const auto inputType = argsTypeInspector.GetElementType(0); auto dataType = inputType; if (const auto optInspector = TOptionalTypeInspector(*typeHelper, inputType)) { dataType = optInspector.GetItemType(); } if (const auto resInspector = TResourceTypeInspector(*typeHelper, dataType)) { typeId = TDataType::Id; } else { const auto dataInspector = TDataTypeInspector(*typeHelper, dataType); typeId = dataInspector.GetTypeId(); } builder.UserType(userType); } const auto optionsType = builder.Optional()->Item(builder.Resource(OptionsResourceName)).Build(); builder.OptionalArgs(1); switch (typeId) { case TDataType::Id: builder.Args()->Add>().Add(optionsType).Done().Returns(builder.Resource(NodeResourceName)); builder.IsStrict(); break; case TDataType::Id: builder.Args()->Add>().Add(optionsType).Done().Returns(builder.Optional()->Item(builder.Resource(NodeResourceName)).Build()); break; default: builder.Args()->Add>().Add(optionsType).Done().Returns(builder.Optional()->Item(builder.Resource(NodeResourceName)).Build()); break; } if (!typesOnly) { builder.Implementation(new TParse(builder.GetSourcePosition(), TDataType::Id == typeId)); } return true; } else { return false; } } }; template<> TUnboxedValue TParse::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseYsonDom(args[0].AsStringRef(), valueBuilder); } catch (const std::exception& e) { if (StrictType_ || ParseOptions(args[1]).Strict) { UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data()); } return TUnboxedValuePod(); } template<> TUnboxedValue TParse::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseJsonDom(args[0].AsStringRef(), valueBuilder); } catch (const std::exception& e) { if (StrictType_ || ParseOptions(args[1]).Strict) { UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data()); } return TUnboxedValuePod(); } template<> TUnboxedValue TParse::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseJsonDom(args[0].AsStringRef(), valueBuilder, true); } catch (const std::exception& e) { if (StrictType_ || ParseOptions(args[1]).Strict) { UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data()); } return TUnboxedValuePod(); } template<> const TStringRef& TParse::Name() { static auto yson = TStringRef::Of("Parse"); return yson; } template<> const TStringRef& TParse::Name() { static auto yson = TStringRef::Of("ParseJson"); return yson; } template<> const TStringRef& TParse::Name() { static auto yson = TStringRef::Of("ParseJsonDecodeUtf8"); return yson; } } // TODO: optimizer that marks UDFs as strict if Yson::Options(false as Strict) is given SIMPLE_MODULE(TYson2Module, TOptions, TParse, TParse, TParse, TConvert, TConvertToBool, TConvertToInt64, TConvertToUint64, TConvertToDouble, TConvertToString, TConvertToList, TConvertToBoolList, TConvertToInt64List, TConvertToUint64List, TConvertToDoubleList, TConvertToStringList, TConvertToDict, TConvertToBoolDict, TConvertToInt64Dict, TConvertToUint64Dict, TConvertToDoubleDict, TConvertToStringDict, TAttributes, TContains, TLookup, TLookupBool, TLookupInt64, TLookupUint64, TLookupDouble, TLookupString, TLookupList, TLookupDict, TYPath, TYPathBool, TYPathInt64, TYPathUint64, TYPathDouble, TYPathString, TYPathList, TYPathDict, TSerialize, TSerializeText, TSerializePretty, TSerializeJson, TWithAttributes, TIsString, TIsInt64, TIsUint64, TIsBool, TIsDouble, TIsList, TIsDict, TIsEntity, TFrom, TGetLength, TEquals, TGetHash ); REGISTER_MODULES(TYson2Module);