sql_query.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. #pragma once
  2. #include "resource.h"
  3. #include "compile_path.h"
  4. #include <yql/essentials/core/sql_types/yql_atom_enums.h>
  5. #include <yql/essentials/public/udf/udf_type_builder.h>
  6. #include <yql/essentials/public/udf/udf_value.h>
  7. #include <yql/essentials/public/udf/udf_helpers.h>
  8. #include <yql/essentials/minikql/dom/node.h>
  9. #include <util/generic/yexception.h>
  10. namespace NJson2Udf {
  11. using namespace NKikimr;
  12. using namespace NUdf;
  13. using namespace NYql;
  14. using namespace NDom;
  15. using namespace NJsonPath;
  16. template <EDataSlot InputType, EJsonQueryWrap Mode>
  17. class TSqlQuery: public TBoxedValue {
  18. public:
  19. explicit TSqlQuery(TSourcePosition pos)
  20. : Pos_(pos)
  21. {
  22. }
  23. static TStringRef Name();
  24. static bool DeclareSignature(
  25. const TStringRef& name,
  26. TType* userType,
  27. IFunctionTypeInfoBuilder& builder,
  28. bool typesOnly) {
  29. Y_UNUSED(userType);
  30. if (name != Name()) {
  31. return false;
  32. }
  33. auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME);
  34. auto optionalJsonType = builder.Optional()->Item(jsonType).Build();
  35. TType* inputType = nullptr;
  36. if constexpr (InputType == EDataSlot::JsonDocument) {
  37. inputType = builder.SimpleType<TJsonDocument>();
  38. } else {
  39. inputType = jsonType;
  40. }
  41. auto inputOptionalType = builder.Optional()->Item(inputType).Build();
  42. auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME);
  43. auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build();
  44. /*
  45. Arguments:
  46. 0. Resource<JsonNode>? or JsonDocument?. Input json
  47. 1. Resource<JsonPath>. Jsonpath to execute on json
  48. 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath
  49. 3. Bool. True - throw on empty result, false otherwise
  50. 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true
  51. 5. Bool. True - throw on error, false - otherwise
  52. 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true
  53. */
  54. // we can't mark TSqlQuery as strict due to runtime throw policy setting
  55. // TODO: optimizer can mark SqlQuery as strict if 3th/5th arguments are literal booleans
  56. builder.Args()
  57. ->Add(inputOptionalType)
  58. .Add(jsonPathType)
  59. .Add(dictType)
  60. .Add<bool>()
  61. .Add(optionalJsonType)
  62. .Add<bool>()
  63. .Add(optionalJsonType)
  64. .Done()
  65. .Returns(optionalJsonType);
  66. if (!typesOnly) {
  67. builder.Implementation(new TSqlQuery(builder.GetSourcePosition()));
  68. }
  69. return true;
  70. }
  71. private:
  72. TUnboxedValue Run(
  73. const IValueBuilder* valueBuilder,
  74. const TUnboxedValuePod* args) const final {
  75. Y_UNUSED(valueBuilder);
  76. try {
  77. if (!args[0].HasValue()) {
  78. return TUnboxedValuePod();
  79. }
  80. TValue jsonDom;
  81. if constexpr (InputType == EDataSlot::JsonDocument) {
  82. jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor());
  83. } else {
  84. jsonDom = TValue(args[0]);
  85. }
  86. auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get());
  87. const auto& jsonPath = *jsonPathResource->Get();
  88. const bool throwOnEmpty = args[3].Get<bool>();
  89. const auto emptyDefault = args[4];
  90. const bool throwOnError = args[5].Get<bool>();
  91. const auto errorDefault = args[6];
  92. const auto variables = DictToVariables(args[2]);
  93. auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder);
  94. const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) {
  95. if (throws) {
  96. ythrow yexception() << message;
  97. }
  98. return caseDefault;
  99. };
  100. if (result.IsError()) {
  101. return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault);
  102. }
  103. auto& nodes = result.GetNodes();
  104. const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object));
  105. if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) {
  106. TVector<TUnboxedValue> converted;
  107. converted.reserve(nodes.size());
  108. for (auto& node : nodes) {
  109. converted.push_back(node.ConvertToUnboxedValue(valueBuilder));
  110. }
  111. return MakeList(converted.data(), converted.size(), valueBuilder);
  112. }
  113. if (nodes.empty()) {
  114. return handleCase("Empty result", throwOnEmpty, emptyDefault);
  115. }
  116. // No wrapping is applicable and result is not empty. Result must be a single object or array
  117. if (nodes.size() > 1) {
  118. return handleCase("Result consists of multiple items", throwOnError, errorDefault);
  119. }
  120. if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) {
  121. return handleCase("Result is neither object nor array", throwOnError, errorDefault);
  122. }
  123. return nodes[0].ConvertToUnboxedValue(valueBuilder);
  124. } catch (const std::exception& e) {
  125. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  126. }
  127. }
  128. TSourcePosition Pos_;
  129. };
  130. template <>
  131. TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() {
  132. return "SqlQuery";
  133. }
  134. template <>
  135. TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() {
  136. return "SqlQueryWrap";
  137. }
  138. template <>
  139. TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() {
  140. return "SqlQueryConditionalWrap";
  141. }
  142. template <>
  143. TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() {
  144. return "JsonDocumentSqlQuery";
  145. }
  146. template <>
  147. TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() {
  148. return "JsonDocumentSqlQueryWrap";
  149. }
  150. template <>
  151. TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() {
  152. return "JsonDocumentSqlQueryConditionalWrap";
  153. }
  154. }