mkql_value_builder.cpp 13 KB


  1. #include "mkql_value_builder.h"
  2. #include "mkql_validate.h"
  3. #include <yql/essentials/minikql/mkql_node_cast.h>
  4. #include <yql/essentials/minikql/mkql_string_util.h>
  5. #include <yql/essentials/minikql/mkql_type_builder.h>
  6. #include <yql/essentials/parser/pg_wrapper/interface/utils.h>
  7. #include <library/cpp/yson/node/node_io.h>
  8. #include <arrow/chunked_array.h>
  9. #include <arrow/array/array_base.h>
  10. #include <arrow/array/util.h>
  11. #include <arrow/c/bridge.h>
  12. #include <util/system/env.h>
  13. namespace NKikimr {
  14. namespace NMiniKQL {
  15. ///////////////////////////////////////////////////////////////////////////////
  16. // TDefaultValueBuilder
  17. ///////////////////////////////////////////////////////////////////////////////
  18. TDefaultValueBuilder::TDefaultValueBuilder(const THolderFactory& holderFactory, NUdf::EValidatePolicy policy)
  19. : HolderFactory_(holderFactory)
  20. , Policy_(policy)
  21. , PgBuilder_(NYql::CreatePgBuilder())
  22. {}
  23. void TDefaultValueBuilder::SetSecureParamsProvider(const NUdf::ISecureParamsProvider* provider) {
  24. SecureParamsProvider_ = provider;
  25. }
  26. void TDefaultValueBuilder::RethrowAtTerminate() {
  27. Rethrow_ = true;
  28. }
  29. void TDefaultValueBuilder::SetCalleePositionHolder(const NUdf::TSourcePosition*& position) {
  30. CalleePositionPtr_ = &position;
  31. }
  32. void TDefaultValueBuilder::Terminate(const char* message) const {
  33. TStringBuf reason = (message ? TStringBuf(message) : TStringBuf("(unknown)"));
  34. TString fullMessage = TStringBuilder() <<
  35. "Terminate was called, reason(" << reason.size() << "): " << reason << Endl;
  36. HolderFactory_.CleanupModulesOnTerminate();
  37. if (Policy_ == NUdf::EValidatePolicy::Exception) {
  38. if (Rethrow_ && std::current_exception()) {
  39. throw;
  40. }
  41. Rethrow_ = true;
  42. throw TTerminateException() << fullMessage;
  43. }
  44. Cerr << fullMessage << Flush;
  45. _exit(1);
  46. }
  47. NUdf::TUnboxedValue TDefaultValueBuilder::NewStringNotFilled(ui32 size) const
  48. {
  49. return MakeStringNotFilled(size);
  50. }
  51. NUdf::TUnboxedValue TDefaultValueBuilder::NewString(const NUdf::TStringRef& ref) const
  52. {
  53. return MakeString(ref);
  54. }
  55. NUdf::TUnboxedValue TDefaultValueBuilder::ConcatStrings(NUdf::TUnboxedValuePod first, NUdf::TUnboxedValuePod second) const
  56. {
  57. return ::NKikimr::NMiniKQL::ConcatStrings(first, second);
  58. }
  59. NUdf::TUnboxedValue TDefaultValueBuilder::AppendString(NUdf::TUnboxedValuePod value, const NUdf::TStringRef& ref) const
  60. {
  61. return ::NKikimr::NMiniKQL::AppendString(value, ref);
  62. }
  63. NUdf::TUnboxedValue TDefaultValueBuilder::PrependString(const NUdf::TStringRef& ref, NUdf::TUnboxedValuePod value) const
  64. {
  65. return ::NKikimr::NMiniKQL::PrependString(ref, value);
  66. }
  67. NUdf::TUnboxedValue TDefaultValueBuilder::SubString(NUdf::TUnboxedValuePod value, ui32 offset, ui32 size) const
  68. {
  69. return ::NKikimr::NMiniKQL::SubString(value, offset, size);
  70. }
  71. NUdf::TUnboxedValue TDefaultValueBuilder::NewList(NUdf::TUnboxedValue* items, ui64 count) const {
  72. if (items == nullptr || count == 0) {
  73. return HolderFactory_.GetEmptyContainerLazy();
  74. }
  75. NUdf::TUnboxedValue* inplace = nullptr;
  76. auto array = HolderFactory_.CreateDirectArrayHolder(count, inplace);
  77. std::copy_n(std::make_move_iterator(items), count, inplace);
  78. return array;
  79. }
  80. NUdf::TUnboxedValue TDefaultValueBuilder::ReverseList(const NUdf::TUnboxedValuePod& list) const
  81. {
  82. return HolderFactory_.ReverseList(this, list);
  83. }
  84. NUdf::TUnboxedValue TDefaultValueBuilder::SkipList(const NUdf::TUnboxedValuePod& list, ui64 count) const
  85. {
  86. return HolderFactory_.SkipList(this, list, count);
  87. }
  88. NUdf::TUnboxedValue TDefaultValueBuilder::TakeList(const NUdf::TUnboxedValuePod& list, ui64 count) const
  89. {
  90. return HolderFactory_.TakeList(this, list, count);
  91. }
  92. NUdf::TUnboxedValue TDefaultValueBuilder::ToIndexDict(const NUdf::TUnboxedValuePod& list) const
  93. {
  94. return HolderFactory_.ToIndexDict(this, list);
  95. }
  96. NUdf::TUnboxedValue TDefaultValueBuilder::NewArray32(ui32 count, NUdf::TUnboxedValue*& itemsPtr) const {
  97. return HolderFactory_.CreateDirectArrayHolder(count, itemsPtr);
  98. }
  99. NUdf::TUnboxedValue TDefaultValueBuilder::NewArray64(ui64 count, NUdf::TUnboxedValue*& itemsPtr) const {
  100. return HolderFactory_.CreateDirectArrayHolder(count, itemsPtr);
  101. }
  102. NUdf::TUnboxedValue TDefaultValueBuilder::NewVariant(ui32 index, NUdf::TUnboxedValue&& value) const {
  103. return HolderFactory_.CreateVariantHolder(value.Release(), index);
  104. }
  105. NUdf::IDictValueBuilder::TPtr TDefaultValueBuilder::NewDict(const NUdf::TType* dictType, ui32 flags) const
  106. {
  107. return HolderFactory_.NewDict(dictType, flags);
  108. }
  109. bool TDefaultValueBuilder::MakeDate(ui32 year, ui32 month, ui32 day, ui16& value) const {
  110. return ::NKikimr::NMiniKQL::MakeDate(year, month, day, value);
  111. }
  112. bool TDefaultValueBuilder::SplitDate(ui16 value, ui32& year, ui32& month, ui32& day) const {
  113. return ::NKikimr::NMiniKQL::SplitDate(value, year, month, day);
  114. }
  115. bool TDefaultValueBuilder::MakeDatetime(ui32 year, ui32 month, ui32 day, ui32 hour, ui32 minute, ui32 second, ui32& value, ui16 tzId) const
  116. {
  117. return ::NKikimr::NMiniKQL::MakeTzDatetime(year, month, day, hour, minute, second, value, tzId);
  118. }
  119. bool TDefaultValueBuilder::SplitDatetime(ui32 value, ui32& year, ui32& month, ui32& day, ui32& hour, ui32& minute, ui32& second, ui16 tzId) const
  120. {
  121. return ::NKikimr::NMiniKQL::SplitTzDatetime(value, year, month, day, hour, minute, second, tzId);
  122. }
  123. bool TDefaultValueBuilder::FullSplitDate(ui16 value, ui32& year, ui32& month, ui32& day,
  124. ui32& dayOfYear, ui32& weekOfYear, ui32& dayOfWeek, ui16 tzId) const {
  125. ui32 unusedWeekOfYearIso8601 = 0;
  126. return ::NKikimr::NMiniKQL::SplitTzDate(value, year, month, day, dayOfYear, weekOfYear, unusedWeekOfYearIso8601, dayOfWeek, tzId);
  127. }
  128. bool TDefaultValueBuilder::FullSplitDate2(ui16 value, ui32& year, ui32& month, ui32& day,
  129. ui32& dayOfYear, ui32& weekOfYear, ui32& weekOfYearIso8601, ui32& dayOfWeek, ui16 tzId) const {
  130. return ::NKikimr::NMiniKQL::SplitTzDate(value, year, month, day, dayOfYear, weekOfYear, weekOfYearIso8601, dayOfWeek, tzId);
  131. }
  132. bool TDefaultValueBuilder::FullSplitDatetime(ui32 value, ui32& year, ui32& month, ui32& day, ui32& hour, ui32& minute, ui32& second,
  133. ui32& dayOfYear, ui32& weekOfYear, ui32& dayOfWeek, ui16 tzId) const {
  134. ui32 unusedWeekOfYearIso8601 = 0;
  135. return ::NKikimr::NMiniKQL::SplitTzDatetime(value, year, month, day, hour, minute, second, dayOfYear, weekOfYear, unusedWeekOfYearIso8601, dayOfWeek, tzId);
  136. }
  137. bool TDefaultValueBuilder::FullSplitDatetime2(ui32 value, ui32& year, ui32& month, ui32& day, ui32& hour, ui32& minute, ui32& second,
  138. ui32& dayOfYear, ui32& weekOfYear, ui32& weekOfYearIso8601, ui32& dayOfWeek, ui16 tzId) const {
  139. return ::NKikimr::NMiniKQL::SplitTzDatetime(value, year, month, day, hour, minute, second, dayOfYear, weekOfYear, weekOfYearIso8601, dayOfWeek, tzId);
  140. }
  141. bool TDefaultValueBuilder::EnrichDate(ui16 date, ui32& dayOfYear, ui32& weekOfYear, ui32& dayOfWeek) const {
  142. ui32 unusedWeekOfYearIso8601 = 0;
  143. return ::NKikimr::NMiniKQL::EnrichDate(date, dayOfYear, weekOfYear, unusedWeekOfYearIso8601, dayOfWeek);
  144. }
  145. bool TDefaultValueBuilder::EnrichDate2(ui16 date, ui32& dayOfYear, ui32& weekOfYear, ui32& weekOfYearIso8601, ui32& dayOfWeek) const {
  146. return ::NKikimr::NMiniKQL::EnrichDate(date, dayOfYear, weekOfYear, weekOfYearIso8601, dayOfWeek);
  147. }
  148. bool TDefaultValueBuilder::GetTimezoneShift(ui32 year, ui32 month, ui32 day, ui32 hour, ui32 minute, ui32 second, ui16 tzId, i32& value) const
  149. {
  150. return ::NKikimr::NMiniKQL::GetTimezoneShift(year, month, day, hour, minute, second, tzId, value);
  151. }
  152. const NUdf::TSourcePosition* TDefaultValueBuilder::CalleePosition() const {
  153. return *CalleePositionPtr_;
  154. }
  155. NUdf::TUnboxedValue TDefaultValueBuilder::Run(const NUdf::TSourcePosition& callee, const NUdf::IBoxedValue& value, const NUdf::TUnboxedValuePod* args) const {
  156. const auto prev = *CalleePositionPtr_;
  157. *CalleePositionPtr_ = &callee;
  158. const auto ret = NUdf::TBoxedValueAccessor::Run(value, this, args);
  159. *CalleePositionPtr_ = prev;
  160. return ret;
  161. }
  162. void TDefaultValueBuilder::ExportArrowBlock(NUdf::TUnboxedValuePod value, ui32 chunk, ArrowArray* out) const {
  163. const auto& datum = TArrowBlock::From(value).GetDatum();
  164. std::shared_ptr<arrow::Array> arr;
  165. if (datum.is_scalar()) {
  166. if (chunk != 0) {
  167. UdfTerminate("Bad chunk index");
  168. }
  169. auto arrRes = arrow::MakeArrayFromScalar(*datum.scalar(), 1);
  170. if (!arrRes.status().ok()) {
  171. UdfTerminate(arrRes.status().ToString().c_str());
  172. }
  173. arr = std::move(arrRes).ValueOrDie();
  174. } else if (datum.is_array()) {
  175. if (chunk != 0) {
  176. UdfTerminate("Bad chunk index");
  177. }
  178. arr = datum.make_array();
  179. } else if (datum.is_arraylike()) {
  180. const auto& chunks = datum.chunks();
  181. if (chunk >= chunks.size()) {
  182. UdfTerminate("Bad chunk index");
  183. }
  184. arr = chunks[chunk];
  185. } else {
  186. UdfTerminate("Unexpected kind of arrow::Datum");
  187. }
  188. auto status = arrow::ExportArray(*arr, out);
  189. if (!status.ok()) {
  190. UdfTerminate(status.ToString().c_str());
  191. }
  192. }
  193. NUdf::TUnboxedValue TDefaultValueBuilder::ImportArrowBlock(ArrowArray* arrays, ui32 chunkCount, bool isScalar, const NUdf::IArrowType& type) const {
  194. const auto dataType = static_cast<const TArrowType&>(type).GetType();
  195. if (isScalar) {
  196. if (chunkCount != 1) {
  197. UdfTerminate("Bad chunkCount value");
  198. }
  199. auto arrRes = arrow::ImportArray(arrays, dataType);
  200. auto arr = std::move(arrRes).ValueOrDie();
  201. if (arr->length() != 1) {
  202. UdfTerminate("Expected array with one element");
  203. }
  204. auto scalarRes = arr->GetScalar(0);
  205. if (!scalarRes.status().ok()) {
  206. UdfTerminate(scalarRes.status().ToString().c_str());
  207. }
  208. auto scalar = std::move(scalarRes).ValueOrDie();
  209. return HolderFactory_.CreateArrowBlock(std::move(scalar));
  210. } else {
  211. if (chunkCount < 1) {
  212. UdfTerminate("Bad chunkCount value");
  213. }
  214. TVector<std::shared_ptr<arrow::Array>> imported(chunkCount);
  215. for (ui32 i = 0; i < chunkCount; ++i) {
  216. auto arrRes = arrow::ImportArray(arrays + i, dataType);
  217. if (!arrRes.status().ok()) {
  218. UdfTerminate(arrRes.status().ToString().c_str());
  219. }
  220. imported[i] = std::move(arrRes).ValueOrDie();
  221. }
  222. if (chunkCount == 1) {
  223. return HolderFactory_.CreateArrowBlock(imported.front());
  224. } else {
  225. return HolderFactory_.CreateArrowBlock(arrow::ChunkedArray::Make(std::move(imported), dataType).ValueOrDie());
  226. }
  227. }
  228. }
  229. ui32 TDefaultValueBuilder::GetArrowBlockChunks(NUdf::TUnboxedValuePod value, bool& isScalar, ui64& length) const {
  230. const auto& datum = TArrowBlock::From(value).GetDatum();
  231. isScalar = false;
  232. length = datum.length();
  233. if (datum.is_scalar()) {
  234. isScalar = true;
  235. return 1;
  236. } else if (datum.is_array()) {
  237. return 1;
  238. } else if (datum.is_arraylike()) {
  239. return datum.chunks().size();
  240. } else {
  241. UdfTerminate("Unexpected kind of arrow::Datum");
  242. }
  243. }
  244. bool TDefaultValueBuilder::FindTimezoneName(ui32 id, NUdf::TStringRef& name) const {
  245. auto res = ::NKikimr::NMiniKQL::FindTimezoneIANAName(id);
  246. if (!res) {
  247. return false;
  248. }
  249. name = *res;
  250. return true;
  251. }
  252. bool TDefaultValueBuilder::FindTimezoneId(const NUdf::TStringRef& name, ui32& id) const {
  253. auto res = ::NKikimr::NMiniKQL::FindTimezoneId(name);
  254. if (!res) {
  255. return false;
  256. }
  257. id = *res;
  258. return true;
  259. }
  260. bool TDefaultValueBuilder::GetSecureParam(NUdf::TStringRef key, NUdf::TStringRef& value) const {
  261. if (SecureParamsProvider_)
  262. return SecureParamsProvider_->GetSecureParam(key, value);
  263. return false;
  264. }
  265. bool TDefaultValueBuilder::SplitTzDate32(i32 date, i32& year, ui32& month, ui32& day,
  266. ui32& dayOfYear, ui32& weekOfYear, ui32& weekOfYearIso8601, ui32& dayOfWeek, ui16 timezoneId) const
  267. {
  268. return ::NKikimr::NMiniKQL::SplitTzDate32(date, year, month, day, dayOfYear, weekOfYear, weekOfYearIso8601, dayOfWeek, timezoneId);
  269. }
  270. bool TDefaultValueBuilder::SplitTzDatetime64(i64 datetime, i32& year, ui32& month, ui32& day,
  271. ui32& hour, ui32& minute, ui32& second,
  272. ui32& dayOfYear, ui32& weekOfYear, ui32& weekOfYearIso8601, ui32& dayOfWeek, ui16 timezoneId) const
  273. {
  274. return ::NKikimr::NMiniKQL::SplitTzDatetime64(
  275. datetime, year, month, day, hour, minute, second,
  276. dayOfYear, weekOfYear, weekOfYearIso8601, dayOfWeek, timezoneId);
  277. }
  278. bool TDefaultValueBuilder::MakeTzDate32(i32 year, ui32 month, ui32 day, i32& date, ui16 timezoneId) const {
  279. return ::NKikimr::NMiniKQL::MakeTzDate32(year, month, day, date, timezoneId);
  280. }
  281. bool TDefaultValueBuilder::MakeTzDatetime64(i32 year, ui32 month, ui32 day,
  282. ui32 hour, ui32 minute, ui32 second, i64& datetime, ui16 timezoneId) const
  283. {
  284. return ::NKikimr::NMiniKQL::MakeTzDatetime64(year, month, day, hour, minute, second, datetime, timezoneId);
  285. }
  286. NUdf::IListValueBuilder::TPtr TDefaultValueBuilder::NewListBuilder() const {
  287. return HolderFactory_.NewList();
  288. }
  289. } // namespace NMiniKQL
  290. } // namespace Nkikimr