Browse Source

Implemented pg_proc/pg_aggregate/pg_language tables + small fixes (#2432)

Vitaly Stoyan 1 year ago
parent
commit
0cda3de632

+ 76 - 3
ydb/library/yql/parser/pg_catalog/catalog.cpp

@@ -44,6 +44,8 @@ using TAmProcs = THashMap<std::tuple<ui32, ui32, ui32, ui32>, TAmProcDesc>;
 
 using TConversions = THashMap<std::pair<TString, TString>, TConversionDesc>;
 
+using TLanguages = THashMap<ui32, TLanguageDesc>;
+
 bool IsCompatibleTo(ui32 actualTypeId, ui32 expectedTypeId, const TTypes& types) {
     if (actualTypeId == expectedTypeId) {
         return true;
@@ -390,6 +392,8 @@ public:
         } else if (key == "prolang") {
             if (value != "c") {
                 IsSupported = false;
+            } else {
+                LastProc.Lang = LangC;
             }
         } else if (key == "proargtypes") {
             TVector<TString> strArgs;
@@ -767,9 +771,7 @@ public:
     void OnFinish() override {
         if (IsSupported) {
             if (FillSupported()) {
-                auto id = Aggregations.size() + 1;
-                LastAggregation.InternalId = id;
-                Aggregations[id] = LastAggregation;
+                Aggregations[LastAggregation.AggId] = LastAggregation;
             }
         }
 
@@ -840,6 +842,22 @@ public:
         }
 
         Y_ENSURE(!LastAggregation.Name.empty());
+        auto funcIdsPtr = ProcByName.FindPtr(LastAggregation.Name);
+        Y_ENSURE(funcIdsPtr);
+        if (funcIdsPtr->size() == 1) {
+            LastAggregation.AggId = funcIdsPtr->front();
+        } else {
+            for (const auto id : *funcIdsPtr) {
+                auto procPtr = Procs.FindPtr(id);
+                Y_ENSURE(procPtr);
+                if (procPtr->ArgTypes == LastAggregation.ArgTypes) {
+                    LastAggregation.AggId = id;
+                    break;
+                }
+            }
+        }
+
+        Y_ENSURE(LastAggregation.AggId);
         if (!ResolveFunc(LastFinalFunc, LastAggregation.FinalFuncId, 1)) {
             return false;
         }
@@ -1250,6 +1268,33 @@ private:
     TConversionDesc LastConversion;
 };
 
+class TLanguagesParser : public TParser {
+public:
+    TLanguagesParser(TLanguages& languages)
+        : Languages(languages)
+    {}
+
+    void OnKey(const TString& key, const TString& value) override {
+        if (key == "oid") {
+            LastLanguage.LangId = FromString<ui32>(value);
+        } else if (key == "lanname") {
+            LastLanguage.Name = value;
+        } else if (key == "descr") {
+            LastLanguage.Descr = value;
+        }
+    }
+
+    void OnFinish() override {
+        Languages[LastLanguage.LangId] = LastLanguage;
+        LastLanguage = TLanguageDesc();
+    }
+
+private:
+    TLanguages& Languages;
+
+    TLanguageDesc LastLanguage;
+};
+
 TOperators ParseOperators(const TString& dat, const THashMap<TString, ui32>& typeByName,
     const TTypes& types, const THashMap<TString, TVector<ui32>>& procByName, const TProcs& procs) {
     TOperators ret;
@@ -1335,6 +1380,13 @@ TAms ParseAms(const TString& dat) {
     return ret;
 }
 
+TLanguages ParseLanguages(const TString& dat) {
+    TLanguages ret;
+    TLanguagesParser parser(ret);
+    parser.Do(dat);
+    return ret;
+}
+
 TNamespaces FillNamespaces() {
     const ui32 PgInformationSchemaNamepace = 1;
     const ui32 PgCatalogNamepace = 11;
@@ -1454,6 +1506,8 @@ struct TCatalog {
         Y_ENSURE(NResource::FindExact("pg_conversion.dat", &conversionData));
         TString amData;
         Y_ENSURE(NResource::FindExact("pg_am.dat", &amData));
+        TString languagesData;
+        Y_ENSURE(NResource::FindExact("pg_language.dat", &languagesData));
         THashMap<ui32, TLazyTypeInfo> lazyTypeInfos;
         Types = ParseTypes(typeData, lazyTypeInfos);
         for (const auto& [k, v] : Types) {
@@ -1609,6 +1663,7 @@ struct TCatalog {
         }
 
         Conversions = ParseConversions(conversionData, ProcByName);
+        Languages = ParseLanguages(languagesData);
     }
 
     static const TCatalog& Instance() {
@@ -1626,6 +1681,7 @@ struct TCatalog {
     TAmOps AmOps;
     TAmProcs AmProcs;
     TConversions Conversions;
+    TLanguages Languages;
     THashMap<TString, TVector<ui32>> ProcByName;
     THashMap<TString, ui32> TypeByName;
     THashMap<std::pair<ui32, ui32>, ui32> CastsByDir;
@@ -2799,6 +2855,23 @@ bool IsCompatibleTo(ui32 actualType, ui32 expectedType) {
     return IsCompatibleTo(actualType, expectedType, catalog.Types);
 }
 
+const TLanguageDesc& LookupLanguage(ui32 langId) {
+    const auto& catalog = TCatalog::Instance();
+    auto langPtr = catalog.Languages.FindPtr(langId);
+    if (!langPtr) {
+        throw yexception() << "No such lang: " << langId;
+    }
+
+    return *langPtr;
+}
+
+void EnumLanguages(std::function<void(ui32, const TLanguageDesc&)> f) {
+    const auto& catalog = TCatalog::Instance();
+    for (const auto& x : catalog.Languages) {
+        f(x.first, x.second);
+    }
+}
+
 const TVector<TTableInfo>& GetStaticTables() {
     const auto& catalog = TCatalog::Instance();
     return catalog.StaticTables;

+ 23 - 9
ydb/library/yql/parser/pg_catalog/catalog.h

@@ -42,12 +42,16 @@ struct TOperDesc {
     ui32 ProcId = 0;
 };
 
-enum EProcKind {
-    Function,
-    Aggregate,
-    Window
+enum class EProcKind : char {
+    Function = 'f',
+    Aggregate = 'a',
+    Window = 'w'
 };
 
+constexpr ui32 LangInternal = 12;
+constexpr ui32 LangC = 13;
+constexpr ui32 LangSQL = 14;
+
 struct TProcDesc {
     ui32 ProcId = 0;
     TString Name;
@@ -60,6 +64,7 @@ struct TProcDesc {
     bool ReturnSet = false;
     TVector<TString> OutputArgNames;
     TVector<ui32> OutputArgTypes;
+    ui32 Lang = LangInternal;
 };
 
 // Copied from pg_collation_d.h
@@ -139,14 +144,14 @@ struct TCastDesc {
     ECoercionCode CoercionCode = ECoercionCode::Unknown;
 };
 
-enum class EAggKind {
-    Normal,
-    OrderedSet,
-    Hypothetical
+enum class EAggKind : char {
+    Normal = 'n',
+    OrderedSet = 'o',
+    Hypothetical = 'h'
 };
 
 struct TAggregateDesc {
-    ui32 InternalId = 0;
+    ui32 AggId = 0;
     TString Name;
     TVector<ui32> ArgTypes;
     EAggKind Kind = EAggKind::Normal;
@@ -233,6 +238,12 @@ struct TConversionDesc {
     ui32 ProcId = 0;
 };
 
+struct TLanguageDesc {
+    ui32 LangId = 0;
+    TString Name;
+    TString Descr;
+};
+
 const TProcDesc& LookupProc(const TString& name, const TVector<ui32>& argTypeIds);
 const TProcDesc& LookupProc(ui32 procId, const TVector<ui32>& argTypeIds);
 const TProcDesc& LookupProc(ui32 procId);
@@ -281,6 +292,9 @@ const TAmProcDesc& LookupAmProc(ui32 familyId, ui32 num, ui32 leftType, ui32 rig
 bool HasConversion(const TString& from, const TString& to);
 const TConversionDesc& LookupConversion(const TString& from, const TString& to);
 
+const TLanguageDesc& LookupLanguage(ui32 langId);
+void EnumLanguages(std::function<void(ui32, const TLanguageDesc&)> f);
+
 bool IsCompatibleTo(ui32 actualType, ui32 expectedType);
 bool IsCoercible(ui32 fromTypeId, ui32 toTypeId, ECoercionCode coercionType);
 

+ 1 - 0
ydb/library/yql/parser/pg_catalog/ya.make

@@ -11,6 +11,7 @@ RESOURCE(../pg_wrapper/postgresql/src/include/catalog/pg_amproc.dat pg_amproc.da
 RESOURCE(../pg_wrapper/postgresql/src/include/catalog/pg_amop.dat pg_amop.dat)
 RESOURCE(../pg_wrapper/postgresql/src/include/catalog/pg_am.dat pg_am.dat)
 RESOURCE(../pg_wrapper/postgresql/src/include/catalog/pg_conversion.dat pg_conversion.dat)
+RESOURCE(../pg_wrapper/postgresql/src/include/catalog/pg_language.dat pg_language.dat)
 
 SRCS(
     catalog.cpp

+ 74 - 1
ydb/library/yql/parser/pg_wrapper/comp_factory.cpp

@@ -424,7 +424,35 @@ public:
                 };
 
                 ApplyFillers(AllPgClassFillers, Y_ARRAY_SIZE(AllPgClassFillers), PgClassFillers_);
-            }
+            } else if (Table_ == "pg_proc") {
+                static const std::pair<const char*, TPgProcFiller> AllPgProcFillers[] = {
+                    {"oid", [](const NPg::TProcDesc& desc) { return ScalarDatumToPod(ObjectIdGetDatum(desc.ProcId)); }},
+                    {"proname", [](const NPg::TProcDesc& desc) { return PointerDatumToPod((Datum)MakeFixedString(desc.Name, NAMEDATALEN)); }},
+                    {"pronamespace", [](const NPg::TProcDesc&) { return ScalarDatumToPod(ObjectIdGetDatum(PG_CATALOG_NAMESPACE)); }},
+                    {"proowner", [](const NPg::TProcDesc&) { return ScalarDatumToPod(ObjectIdGetDatum(1)); }},
+                    {"prorettype", [](const NPg::TProcDesc& desc) { return ScalarDatumToPod(ObjectIdGetDatum(desc.ResultType)); }},
+                    {"prolang", [](const NPg::TProcDesc& desc) { return ScalarDatumToPod(ObjectIdGetDatum(desc.Lang)); }},
+                    {"prokind", [](const NPg::TProcDesc& desc) { return ScalarDatumToPod(CharGetDatum(desc.Kind)); }},
+                };
+
+                ApplyFillers(AllPgProcFillers, Y_ARRAY_SIZE(AllPgProcFillers), PgProcFillers_);
+            } else if (Table_ == "pg_aggregate") {
+                static const std::pair<const char*, TPgAggregateFiller> AllPgAggregateFillers[] = {
+                    {"aggfnoid", [](const NPg::TAggregateDesc& desc) { return ScalarDatumToPod(ObjectIdGetDatum(desc.AggId)); }},
+                    {"aggkind", [](const NPg::TAggregateDesc& desc) { return ScalarDatumToPod(CharGetDatum(desc.Kind)); }},
+                    {"aggtranstype", [](const NPg::TAggregateDesc& desc) { return ScalarDatumToPod(ObjectIdGetDatum(desc.TransTypeId)); }},
+                };
+
+                ApplyFillers(AllPgAggregateFillers, Y_ARRAY_SIZE(AllPgAggregateFillers), PgAggregateFillers_);
+            } else if (Table_ == "pg_language") {
+                static const std::pair<const char*, TPgLanguageFiller> AllPgLanguageFillers[] = {
+                    {"oid", [](const NPg::TLanguageDesc& desc) { return ScalarDatumToPod(ObjectIdGetDatum(desc.LangId)); }},
+                    {"lanname", [](const NPg::TLanguageDesc& desc) { return PointerDatumToPod((Datum)MakeFixedString(desc.Name, NAMEDATALEN)); }},
+                    {"lanowner", [](const NPg::TLanguageDesc&) { return ScalarDatumToPod(ObjectIdGetDatum(1)); }},
+                };
+
+                ApplyFillers(AllPgLanguageFillers, Y_ARRAY_SIZE(AllPgLanguageFillers), PgLanguageFillers_);
+            }            
         } else {
             if (Table_ == "tables") {
                 static const std::pair<const char*, TTablesFiller> AllTablesFillers[] = {
@@ -701,6 +729,42 @@ public:
 
                     rows.emplace_back(row);
                 }
+            } else if (Table_ == "pg_proc") {
+                NPg::EnumProc([&](ui32, const NPg::TProcDesc& desc) {
+                    NUdf::TUnboxedValue* items;
+                    auto row = compCtx.HolderFactory.CreateDirectArrayHolder(PgProcFillers_.size(), items);
+                    for (ui32 i = 0; i < PgProcFillers_.size(); ++i) {
+                        if (PgProcFillers_[i]) {
+                            items[i] = PgProcFillers_[i](desc);
+                        }
+                    }
+
+                    rows.emplace_back(row);
+                });
+            } else if (Table_ == "pg_aggregate") {
+                NPg::EnumAggregation([&](ui32, const NPg::TAggregateDesc& desc) {
+                    NUdf::TUnboxedValue* items;
+                    auto row = compCtx.HolderFactory.CreateDirectArrayHolder(PgAggregateFillers_.size(), items);
+                    for (ui32 i = 0; i < PgAggregateFillers_.size(); ++i) {
+                        if (PgAggregateFillers_[i]) {
+                            items[i] = PgAggregateFillers_[i](desc);
+                        }
+                    }
+
+                    rows.emplace_back(row);
+                });
+            } else if (Table_ == "pg_language") {
+                NPg::EnumLanguages([&](ui32, const NPg::TLanguageDesc& desc) {
+                    NUdf::TUnboxedValue* items;
+                    auto row = compCtx.HolderFactory.CreateDirectArrayHolder(PgLanguageFillers_.size(), items);
+                    for (ui32 i = 0; i < PgLanguageFillers_.size(); ++i) {
+                        if (PgLanguageFillers_[i]) {
+                            items[i] = PgLanguageFillers_[i](desc);
+                        }
+                    }
+
+                    rows.emplace_back(row);
+                });
             }
         } else {
             if (Table_ == "tables") {
@@ -783,6 +847,15 @@ private:
 
     using TPgClassFiller = NUdf::TUnboxedValuePod(*)(const NPg::TTableInfo&, ui32 namespaceOid, ui32 amOid);
     TVector<TPgClassFiller> PgClassFillers_;
+
+    using TPgProcFiller = NUdf::TUnboxedValuePod(*)(const NPg::TProcDesc&);
+    TVector<TPgProcFiller> PgProcFillers_;
+
+    using TPgAggregateFiller = NUdf::TUnboxedValuePod(*)(const NPg::TAggregateDesc&);
+    TVector<TPgAggregateFiller> PgAggregateFillers_;
+
+    using TPgLanguageFiller = NUdf::TUnboxedValuePod(*)(const NPg::TLanguageDesc&);
+    TVector<TPgLanguageFiller> PgLanguageFillers_;
 };
 
 class TFunctionCallInfo {

+ 8 - 8
ydb/library/yql/parser/pg_wrapper/generate_kernels.py

@@ -45,7 +45,7 @@ def main():
     with open("pg_sources.inc") as f:
         for line in f:
             pg_sources.append(line.rstrip())
-    with open("../../../../../yql/tools/pg_catalog_dump/dump.json") as f:
+    with open("../../tools/pg_catalog_dump/dump.json") as f:
         catalog = json.load(f)
     catalog_by_oid = {}
     catalog_funcs = set()
@@ -56,7 +56,7 @@ def main():
     for agg in catalog["aggregation"]:
         if not agg["combine_func_id"]:
             continue
-        catalog_aggs_by_id[agg["internal_id"]] = agg
+        catalog_aggs_by_id[agg["agg_id"]] = agg
         assert len(agg["args"]) <= 2
 
     funcs={}
@@ -290,9 +290,9 @@ def main():
                 "    const std::vector<ui32>& argsColumns,\n" \
                 "    const TTypeEnvironment& env) const final {\n" \
                 "    const auto& aggDesc = ResolveAggregation(\"NAME\", tupleType, argsColumns, nullptr);\n" \
-                "    switch (aggDesc.InternalId) {\n" +
+                "    switch (aggDesc.AggId) {\n" +
                 "".join(["    case " + str(agg_id) + ": return MakePgAgg_NAME_" + str(agg_id) + "().PrepareCombineAll(filterColumn, argsColumns, aggDesc);\n" for agg_id in agg_names[name]]) +
-                "    default: throw yexception() << \"Unsupported agg id: \" << aggDesc.InternalId;\n" \
+                "    default: throw yexception() << \"Unsupported agg id: \" << aggDesc.AggId;\n" \
                 "    }\n" \
                 "}\n" \
                 "\n" \
@@ -301,9 +301,9 @@ def main():
                 "    const std::vector<ui32>& argsColumns,\n" \
                 "    const TTypeEnvironment& env) const final {\n" \
                 "    const auto& aggDesc = ResolveAggregation(\"NAME\", tupleType, argsColumns, nullptr);\n"                
-                "    switch (aggDesc.InternalId) {\n" +
+                "    switch (aggDesc.AggId) {\n" +
                 "".join(["    case " + str(agg_id) + ": return MakePgAgg_NAME_" + str(agg_id) + "().PrepareCombineKeys(argsColumns, aggDesc);\n" for agg_id in agg_names[name]]) +
-                "    default: throw yexception() << \"Unsupported agg id: \" << aggDesc.InternalId;\n" \
+                "    default: throw yexception() << \"Unsupported agg id: \" << aggDesc.AggId;\n" \
                 "    }\n" \
                 "}\n" \
                 "\n" \
@@ -313,9 +313,9 @@ def main():
                 "    const TTypeEnvironment& env,\n" \
                 "    TType* returnType) const final {\n" \
                 "    const auto& aggDesc = ResolveAggregation(\"NAME\", tupleType, argsColumns, returnType);\n"
-                "    switch (aggDesc.InternalId) {\n" +
+                "    switch (aggDesc.AggId) {\n" +
                 "".join(["    case " + str(agg_id) + ": return MakePgAgg_NAME_" + str(agg_id) + "().PrepareFinalizeKeys(argsColumns.front(), aggDesc);\n" for agg_id in agg_names[name]]) +
-                "    default: throw yexception() << \"Unsupported agg id: \" << aggDesc.InternalId;\n" \
+                "    default: throw yexception() << \"Unsupported agg id: \" << aggDesc.AggId;\n" \
                 "    }\n" \
                 "}\n" \
                 "};\n").replace("NAME", name))

File diff suppressed because it is too large
+ 293 - 243
ydb/library/yql/parser/pg_wrapper/pg_aggs.inc


File diff suppressed because it is too large
+ 245 - 225
ydb/library/yql/parser/pg_wrapper/pg_aggs.slow.inc


File diff suppressed because it is too large
+ 324 - 324
ydb/library/yql/parser/pg_wrapper/pg_aggs_register.inc


+ 1 - 1
ydb/library/yql/parser/pg_wrapper/postgresql/src/backend/utils/cache/syscache.c

@@ -1385,7 +1385,7 @@ SearchSysCacheCopyAttNum(Oid relid, int16 attnum)
  * a different cache for the same catalog the tuple was fetched from.
  */
 Datum
-SysCacheGetAttr(int cacheId, HeapTuple tup,
+SysCacheGetAttr_original(int cacheId, HeapTuple tup,
 				AttrNumber attributeNumber,
 				bool *isNull)
 {

+ 25 - 0
ydb/library/yql/parser/pg_wrapper/postgresql/src/include/catalog/pg_language.dat

@@ -0,0 +1,25 @@
+#----------------------------------------------------------------------
+#
+# pg_language.dat
+#    Initial contents of the pg_language system catalog.
+#
+# Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/include/catalog/pg_language.dat
+#
+#----------------------------------------------------------------------
+
+[
+
+{ oid => '12', oid_symbol => 'INTERNALlanguageId',
+  descr => 'built-in functions',
+  lanname => 'internal', lanvalidator => 'fmgr_internal_validator' },
+{ oid => '13', oid_symbol => 'ClanguageId',
+  descr => 'dynamically-loaded C functions',
+  lanname => 'c', lanvalidator => 'fmgr_c_validator' },
+{ oid => '14', oid_symbol => 'SQLlanguageId',
+  descr => 'SQL-language functions',
+  lanname => 'sql', lanpltrusted => 't', lanvalidator => 'fmgr_sql_validator' },
+
+]

Some files were not shown because too many files changed in this diff