datetime_udf.cpp 106 KB


  1. #include <yql/essentials/minikql/mkql_type_ops.h>
  2. #include <yql/essentials/public/udf/tz/udf_tz.h>
  3. #include <yql/essentials/public/udf/udf_helpers.h>
  4. #include <yql/essentials/minikql/datetime/datetime.h>
  5. #include <yql/essentials/minikql/datetime/datetime64.h>
  6. #include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
  7. #include <util/datetime/base.h>
  8. using namespace NKikimr;
  9. using namespace NUdf;
  10. using namespace NYql::DateTime;
  11. extern const char SplitUDF[] = "Split";
  12. extern const char ToSecondsUDF[] = "ToSeconds";
  13. extern const char ToMillisecondsUDF[] = "ToMilliseconds";
  14. extern const char ToMicrosecondsUDF[] = "ToMicroseconds";
  15. extern const char GetYearUDF[] = "GetYear";
  16. extern const char GetDayOfYearUDF[] = "GetDayOfYear";
  17. extern const char GetMonthUDF[] = "GetMonth";
  18. extern const char GetMonthNameUDF[] = "GetMonthName";
  19. extern const char GetWeekOfYearUDF[] = "GetWeekOfYear";
  20. extern const char GetWeekOfYearIso8601UDF[] = "GetWeekOfYearIso8601";
  21. extern const char GetDayOfMonthUDF[] = "GetDayOfMonth";
  22. extern const char GetDayOfWeekUDF[] = "GetDayOfWeek";
  23. extern const char GetDayOfWeekNameUDF[] = "GetDayOfWeekName";
  24. extern const char GetTimezoneIdUDF[] = "GetTimezoneId";
  25. extern const char GetTimezoneNameUDF[] = "GetTimezoneName";
  26. extern const char GetHourUDF[] = "GetHour";
  27. extern const char GetMinuteUDF[] = "GetMinute";
  28. extern const char GetSecondUDF[] = "GetSecond";
  29. extern const char GetMillisecondOfSecondUDF[] = "GetMillisecondOfSecond";
  30. extern const char GetMicrosecondOfSecondUDF[] = "GetMicrosecondOfSecond";
  31. extern const char TMResourceName[] = "DateTime2.TM";
  32. extern const char TM64ResourceName[] = "DateTime2.TM64";
  33. const auto UsecondsInDay = 86400000000ll;
  34. const auto UsecondsInHour = 3600000000ll;
  35. const auto UsecondsInMinute = 60000000ll;
  36. const auto UsecondsInSecond = 1000000ll;
  37. const auto UsecondsInMilliseconds = 1000ll;
  38. template <const char* TFuncName, typename TResult, ui32 ScaleAfterSeconds>
  39. class TToUnits {
  40. public:
  41. typedef bool TTypeAwareMarker;
  42. using TSignedResult = typename std::make_signed<TResult>::type;
  43. static TResult DateCore(ui16 value) {
  44. return value * ui32(86400) * TResult(ScaleAfterSeconds);
  45. }
  46. template<typename TTzDate>
  47. static TResult TzBlockCore(TBlockItem tzDate);
  48. template<>
  49. static TResult TzBlockCore<TTzDate>(TBlockItem tzDate) {
  50. return DateCore(tzDate.Get<ui16>());
  51. }
  52. template<>
  53. static TResult TzBlockCore<TTzDatetime>(TBlockItem tzDate) {
  54. return DatetimeCore(tzDate.Get<ui32>());
  55. }
  56. template<>
  57. static TResult TzBlockCore<TTzTimestamp>(TBlockItem tzDate) {
  58. return TimestampCore(tzDate.Get<ui64>());
  59. }
  60. static TResult DatetimeCore(ui32 value) {
  61. return value * TResult(ScaleAfterSeconds);
  62. }
  63. static TResult TimestampCore(ui64 value) {
  64. return TResult(value / (1000000u / ScaleAfterSeconds));
  65. }
  66. static TSignedResult IntervalCore(i64 value) {
  67. return TSignedResult(value / (1000000u / ScaleAfterSeconds));
  68. }
  69. static const TStringRef& Name() {
  70. static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
  71. return name;
  72. }
  73. template<typename TTzDate, typename TOutput>
  74. static auto MakeTzBlockExec() {
  75. using TReader = TTzDateBlockReader<TTzDate, /*Nullable*/ false>;
  76. return UnaryPreallocatedReaderExecImpl<TReader, TOutput, TzBlockCore<TTzDate>>;
  77. }
  78. static bool DeclareSignature(
  79. const TStringRef& name,
  80. TType* userType,
  81. IFunctionTypeInfoBuilder& builder,
  82. bool typesOnly)
  83. {
  84. if (Name() != name) {
  85. return false;
  86. }
  87. try {
  88. auto typeInfoHelper = builder.TypeInfoHelper();
  89. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  90. Y_ENSURE(tuple);
  91. Y_ENSURE(tuple.GetElementsCount() > 0);
  92. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  93. Y_ENSURE(argsTuple);
  94. if (argsTuple.GetElementsCount() != 1) {
  95. builder.SetError("Expected one argument");
  96. return true;
  97. }
  98. auto argType = argsTuple.GetElementType(0);
  99. TVector<const TType*> argBlockTypes;
  100. argBlockTypes.push_back(argType);
  101. TBlockTypeInspector block(*typeInfoHelper, argType);
  102. if (block) {
  103. Y_ENSURE(!block.IsScalar());
  104. argType = block.GetItemType();
  105. }
  106. bool isOptional = false;
  107. if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  108. argType = opt.GetItemType();
  109. isOptional = true;
  110. }
  111. TDataTypeInspector data(*typeInfoHelper, argType);
  112. if (!data) {
  113. builder.SetError("Expected data type");
  114. return true;
  115. }
  116. auto typeId = data.GetTypeId();
  117. if (!(typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id ||
  118. typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id ||
  119. typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id ||
  120. typeId == TDataType<TInterval>::Id)) {
  121. builder.SetError(TStringBuilder() << "Type " << GetDataTypeInfo(GetDataSlot(typeId)).Name << " is not supported");
  122. }
  123. builder.Args()->Add(argsTuple.GetElementType(0)).Done();
  124. const TType* retType;
  125. if (typeId != TDataType<TInterval>::Id) {
  126. retType = builder.SimpleType<TResult>();
  127. } else {
  128. retType = builder.SimpleType<TSignedResult>();
  129. }
  130. if (isOptional) {
  131. retType = builder.Optional()->Item(retType).Build();
  132. }
  133. auto outputType = retType;
  134. if (block) {
  135. retType = builder.Block(block.IsScalar())->Item(retType).Build();
  136. }
  137. builder.Returns(retType);
  138. builder.SupportsBlocks();
  139. builder.IsStrict();
  140. builder.UserType(userType);
  141. if (!typesOnly) {
  142. if (typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id) {
  143. if (block) {
  144. const auto exec = (typeId == TDataType<TTzDate>::Id)
  145. ? MakeTzBlockExec<TTzDate, TResult>()
  146. : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>;
  147. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  148. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  149. } else {
  150. builder.Implementation(new TUnaryOverOptionalImpl<ui16, TResult, DateCore>());
  151. }
  152. }
  153. if (typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id) {
  154. if (block) {
  155. const auto exec = (typeId == TDataType<TTzDatetime>::Id)
  156. ? MakeTzBlockExec<TTzDatetime, TResult>()
  157. : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>;
  158. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  159. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  160. } else {
  161. builder.Implementation(new TUnaryOverOptionalImpl<ui32, TResult, DatetimeCore>());
  162. }
  163. }
  164. if (typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id) {
  165. if (block) {
  166. const auto exec = (typeId == TDataType<TTzTimestamp>::Id)
  167. ? MakeTzBlockExec<TTzTimestamp, TResult>()
  168. : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>;
  169. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  170. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  171. } else {
  172. builder.Implementation(new TUnaryOverOptionalImpl<ui64, TResult, TimestampCore>());
  173. }
  174. }
  175. if (typeId == TDataType<TInterval>::Id) {
  176. if (block) {
  177. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  178. UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  179. } else {
  180. builder.Implementation(new TUnaryOverOptionalImpl<i64, TSignedResult, IntervalCore>());
  181. }
  182. }
  183. }
  184. } catch (const std::exception& e) {
  185. builder.SetError(TStringBuf(e.what()));
  186. }
  187. return true;
  188. }
  189. };
  190. template <const char* TFuncName, typename TFieldStorage, TFieldStorage (*FieldFunc)(const TUnboxedValuePod&), ui32 Divisor, ui32 Scale, ui32 Limit, bool Fractional>
  191. struct TGetTimeComponent {
  192. typedef bool TTypeAwareMarker;
  193. template <typename TInput, bool AlwaysZero, bool InputFractional>
  194. static TFieldStorage Core(TInput val) {
  195. if constexpr (AlwaysZero) {
  196. return 0;
  197. }
  198. if constexpr (InputFractional) {
  199. if constexpr (Fractional) {
  200. return (val / Scale) % Limit;
  201. } else {
  202. return (val / 1000000u / Scale) % Limit;
  203. }
  204. } else {
  205. if constexpr (Fractional) {
  206. return 0;
  207. } else {
  208. return (val / Scale) % Limit;
  209. }
  210. }
  211. }
  212. class TImpl : public TBoxedValue {
  213. public:
  214. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  215. Y_UNUSED(valueBuilder);
  216. if (!args[0]) {
  217. return {};
  218. }
  219. return TUnboxedValuePod(TFieldStorage((FieldFunc(args[0])) / Divisor));
  220. }
  221. };
  222. static const TStringRef& Name() {
  223. static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
  224. return name;
  225. }
  226. static bool DeclareSignature(
  227. const TStringRef& name,
  228. TType* userType,
  229. IFunctionTypeInfoBuilder& builder,
  230. bool typesOnly)
  231. {
  232. if (Name() != name) {
  233. return false;
  234. }
  235. try {
  236. auto typeInfoHelper = builder.TypeInfoHelper();
  237. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  238. if (tuple) {
  239. Y_ENSURE(tuple.GetElementsCount() > 0);
  240. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  241. Y_ENSURE(argsTuple);
  242. if (argsTuple.GetElementsCount() != 1) {
  243. builder.SetError("Expected one argument");
  244. return true;
  245. }
  246. auto argType = argsTuple.GetElementType(0);
  247. TVector<const TType*> argBlockTypes;
  248. argBlockTypes.push_back(argType);
  249. TBlockTypeInspector block(*typeInfoHelper, argType);
  250. if (block) {
  251. Y_ENSURE(!block.IsScalar());
  252. argType = block.GetItemType();
  253. }
  254. bool isOptional = false;
  255. if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  256. argType = opt.GetItemType();
  257. isOptional = true;
  258. }
  259. TResourceTypeInspector res(*typeInfoHelper, argType);
  260. if (!res) {
  261. TDataTypeInspector data(*typeInfoHelper, argType);
  262. if (!data) {
  263. builder.SetError("Expected data type");
  264. return true;
  265. }
  266. auto typeId = data.GetTypeId();
  267. if (typeId == TDataType<TDate>::Id ||
  268. typeId == TDataType<TDatetime>::Id ||
  269. typeId == TDataType<TTimestamp>::Id) {
  270. builder.Args()->Add(argsTuple.GetElementType(0)).Done();
  271. const TType* retType = builder.SimpleType<TFieldStorage>();
  272. if (isOptional) {
  273. retType = builder.Optional()->Item(retType).Build();
  274. }
  275. auto outputType = retType;
  276. if (block) {
  277. retType = builder.Block(block.IsScalar())->Item(retType).Build();
  278. }
  279. builder.Returns(retType);
  280. builder.SupportsBlocks();
  281. builder.IsStrict();
  282. builder.UserType(userType);
  283. if (!typesOnly) {
  284. if (typeId == TDataType<TDate>::Id) {
  285. if (block) {
  286. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  287. UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  288. } else {
  289. builder.Implementation(new TUnaryOverOptionalImpl<ui16, TFieldStorage, Core<ui16, true, false>>());
  290. }
  291. }
  292. if (typeId == TDataType<TDatetime>::Id) {
  293. if (block) {
  294. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  295. UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  296. } else {
  297. builder.Implementation(new TUnaryOverOptionalImpl<ui32, TFieldStorage, Core<ui32, false, false>>());
  298. }
  299. }
  300. if (typeId == TDataType<TTimestamp>::Id) {
  301. if (block) {
  302. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  303. UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  304. } else {
  305. builder.Implementation(new TUnaryOverOptionalImpl<ui64, TFieldStorage, Core<ui64, false, true>>());
  306. }
  307. }
  308. }
  309. return true;
  310. }
  311. } else {
  312. Y_ENSURE(!block);
  313. if (res.GetTag() != TStringRef::Of(TMResourceName)) {
  314. builder.SetError("Unexpected resource tag");
  315. return true;
  316. }
  317. }
  318. }
  319. // default implementation
  320. builder.Args()->Add<TResource<TMResourceName>>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Done();
  321. builder.Returns<TFieldStorage>();
  322. builder.IsStrict();
  323. if (!typesOnly) {
  324. builder.Implementation(new TImpl());
  325. }
  326. } catch (const std::exception& e) {
  327. builder.SetError(TStringBuf(e.what()));
  328. }
  329. return true;
  330. }
  331. };
  332. namespace {
  333. const TTMStorage& Reference(const NUdf::TUnboxedValuePod& value) {
  334. return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr());
  335. }
  336. TTMStorage& Reference(NUdf::TUnboxedValuePod& value) {
  337. return *reinterpret_cast<TTMStorage*>(value.GetRawPtr());
  338. }
  339. const TTMStorage& Reference(const TBlockItem& value) {
  340. return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr());
  341. }
  342. Y_DECLARE_UNUSED TTMStorage& Reference(TBlockItem& value) {
  343. return *reinterpret_cast<TTMStorage*>(value.GetRawPtr());
  344. }
  345. const TTM64Storage& Reference64(const NUdf::TUnboxedValuePod& value) {
  346. return *reinterpret_cast<const TTM64Storage*>(value.GetRawPtr());
  347. }
  348. TTM64Storage& Reference64(NUdf::TUnboxedValuePod& value) {
  349. return *reinterpret_cast<TTM64Storage*>(value.GetRawPtr());
  350. }
  351. template<typename TValue>
  352. TValue DoAddMonths(const TValue& date, i64 months, const NUdf::IDateBuilder& builder) {
  353. auto result = date;
  354. auto& storage = Reference(result);
  355. if (!NYql::DateTime::DoAddMonths(storage, months, builder)) {
  356. return TValue{};
  357. }
  358. return result;
  359. }
  360. template<typename TValue>
  361. TValue DoAddQuarters(const TValue& date, i64 quarters, const NUdf::IDateBuilder& builder) {
  362. return DoAddMonths(date, quarters * 3ll, builder);
  363. }
  364. template<typename TValue>
  365. TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& builder) {
  366. auto result = date;
  367. auto& storage = Reference(result);
  368. if (!NYql::DateTime::DoAddYears(storage, years, builder)) {
  369. return TValue{};
  370. }
  371. return result;
  372. }
  373. #define ACCESSORS_POLY(field, type, wtype) \
  374. template<typename TValue> \
  375. inline type Get##field(const TValue& tm) { \
  376. return (type)Reference(tm).field; \
  377. } \
  378. template<typename TValue> \
  379. inline wtype GetW##field(const TValue& tm) { \
  380. return (wtype)Reference64(tm).field; \
  381. } \
  382. template<typename TValue> \
  383. inline void Set##field(TValue& tm, type value) { \
  384. Reference(tm).field = value; \
  385. } \
  386. #define ACCESSORS(field, type) \
  387. ACCESSORS_POLY(field, type, type)
  388. ACCESSORS_POLY(Year, ui16, i32)
  389. ACCESSORS(DayOfYear, ui16)
  390. ACCESSORS(WeekOfYear, ui8)
  391. ACCESSORS(WeekOfYearIso8601, ui8)
  392. ACCESSORS(DayOfWeek, ui8)
  393. ACCESSORS(Month, ui8)
  394. ACCESSORS(Day, ui8)
  395. ACCESSORS(Hour, ui8)
  396. ACCESSORS(Minute, ui8)
  397. ACCESSORS(Second, ui8)
  398. ACCESSORS(Microsecond, ui32)
  399. ACCESSORS(TimezoneId, ui16)
  400. #undef ACCESSORS
  401. #undef ACCESSORS_POLY
  402. inline bool ValidateYear(ui16 year) {
  403. return year >= NUdf::MIN_YEAR - 1 || year <= NUdf::MAX_YEAR + 1;
  404. }
  405. inline bool ValidateMonth(ui8 month) {
  406. return month >= 1 && month <= 12;
  407. }
  408. inline bool ValidateDay(ui8 day) {
  409. return day >= 1 && day <= 31;
  410. }
  411. inline bool ValidateHour(ui8 hour) {
  412. return hour < 24;
  413. }
  414. inline bool ValidateMinute(ui8 minute) {
  415. return minute < 60;
  416. }
  417. inline bool ValidateSecond(ui8 second) {
  418. return second < 60;
  419. }
  420. inline bool ValidateMicrosecond(ui32 microsecond) {
  421. return microsecond < 1000000;
  422. }
  423. inline bool ValidateTimezoneId(ui16 timezoneId) {
  424. const auto& zones = NUdf::GetTimezones();
  425. return timezoneId < zones.size() && !zones[timezoneId].empty();
  426. }
  427. inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) {
  428. static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
  429. int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
  430. if (cmp == 0)
  431. return a.size() < b.size();
  432. return cmp < 0;
  433. };
  434. static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
  435. {"jan", 1},
  436. {"feb", 2},
  437. {"mar", 3},
  438. {"apr", 4},
  439. {"may", 5},
  440. {"jun", 6},
  441. {"jul", 7},
  442. {"aug", 8},
  443. {"sep", 9},
  444. {"oct", 10},
  445. {"nov", 11},
  446. {"dec", 12}
  447. };
  448. const auto& it = mp.find(monthName);
  449. if (it != mp.end()) {
  450. month = it -> second;
  451. return true;
  452. }
  453. return false;
  454. }
  455. inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) {
  456. static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
  457. int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
  458. if (cmp == 0)
  459. return a.size() < b.size();
  460. return cmp < 0;
  461. };
  462. static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
  463. {"january", 1},
  464. {"february", 2},
  465. {"march", 3},
  466. {"april", 4},
  467. {"may", 5},
  468. {"june", 6},
  469. {"july", 7},
  470. {"august", 8},
  471. {"september", 9},
  472. {"october", 10},
  473. {"november", 11},
  474. {"december", 12}
  475. };
  476. const auto& it = mp.find(monthName);
  477. if (it != mp.end()) {
  478. month = it -> second;
  479. return true;
  480. }
  481. return false;
  482. }
  483. template<typename TType>
  484. inline bool Validate(typename TDataType<TType>::TLayout arg);
  485. template<>
  486. inline bool Validate<TTimestamp>(ui64 timestamp) {
  487. return timestamp < MAX_TIMESTAMP;
  488. }
  489. template<>
  490. inline bool Validate<TTimestamp64>(i64 timestamp) {
  491. return timestamp >= MIN_TIMESTAMP64 && timestamp <= MAX_TIMESTAMP64;
  492. }
  493. template<>
  494. inline bool Validate<TInterval>(i64 interval) {
  495. return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP);
  496. }
  497. template<>
  498. inline bool Validate<TInterval64>(i64 interval) {
  499. return interval >= -MAX_INTERVAL64 && interval <= MAX_INTERVAL64;
  500. }
  501. // Split
  502. template<typename TUserDataType, bool Nullable>
  503. using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result,
  504. TTzDateBlockReader<TUserDataType, Nullable>,
  505. TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>;
  506. template<typename TUserDataType>
  507. struct TSplitKernelExec : TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> {
  508. static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder);
  509. template<typename TSink>
  510. static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) {
  511. try {
  512. TBlockItem res {0};
  513. Split(arg, Reference(res), *valueBuilder);
  514. sink(res);
  515. } catch (const std::exception& e) {
  516. UdfTerminate((TStringBuilder() << e.what()).data());
  517. }
  518. }
  519. };
  520. template <typename TUserDataType>
  521. class TSplit : public TBoxedValue {
  522. const TSourcePosition Pos_;
  523. public:
  524. explicit TSplit(TSourcePosition pos)
  525. : Pos_(pos)
  526. {}
  527. TUnboxedValue Run(
  528. const IValueBuilder* valueBuilder,
  529. const TUnboxedValuePod* args) const override;
  530. static bool DeclareSignature(
  531. TStringRef name,
  532. TType* userType,
  533. IFunctionTypeInfoBuilder& builder,
  534. bool typesOnly)
  535. {
  536. const auto typeInfoHelper = builder.TypeInfoHelper();
  537. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  538. Y_ENSURE(tuple);
  539. Y_ENSURE(tuple.GetElementsCount() > 0);
  540. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  541. Y_ENSURE(argsTuple);
  542. if (argsTuple.GetElementsCount() != 1) {
  543. builder.SetError("Expected one argument");
  544. return true;
  545. }
  546. auto argType = argsTuple.GetElementType(0);
  547. builder.UserType(userType);
  548. builder.SupportsBlocks();
  549. builder.IsStrict();
  550. TBlockTypeInspector block(*typeInfoHelper, argType);
  551. if (block) {
  552. const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build();
  553. builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap);
  554. const auto* retType = builder.Resource(TMResourceName);
  555. const auto* blockRetType = builder.Block(false)->Item(retType).Build();
  556. builder.Returns(blockRetType);
  557. if (!typesOnly) {
  558. builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(),
  559. TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE));
  560. }
  561. } else {
  562. builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap);
  563. if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::BigDateType) {
  564. builder.Returns(builder.Resource(TM64ResourceName));
  565. } else {
  566. builder.Returns(builder.Resource(TMResourceName));
  567. }
  568. if (!typesOnly) {
  569. builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition()));
  570. }
  571. }
  572. return true;
  573. }
  574. };
  575. template <>
  576. void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  577. storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>());
  578. }
  579. template <>
  580. void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  581. storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>());
  582. }
  583. template <>
  584. void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  585. storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>());
  586. }
  587. template <>
  588. void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  589. storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId());
  590. }
  591. template <>
  592. void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  593. storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId());
  594. }
  595. template <>
  596. void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  597. storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId());
  598. }
  599. template <>
  600. void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  601. ythrow yexception() << "Not implemented";
  602. }
  603. template <>
  604. void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  605. ythrow yexception() << "Not implemented";
  606. }
  607. template <>
  608. void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  609. ythrow yexception() << "Not implemented";
  610. }
  611. template <>
  612. TUnboxedValue TSplit<TDate>::Run(
  613. const IValueBuilder* valueBuilder,
  614. const TUnboxedValuePod* args) const
  615. {
  616. try {
  617. EMPTY_RESULT_ON_EMPTY_ARG(0);
  618. auto& builder = valueBuilder->GetDateBuilder();
  619. TUnboxedValuePod result(0);
  620. auto& storage = Reference(result);
  621. storage.FromDate(builder, args[0].Get<ui16>());
  622. return result;
  623. } catch (const std::exception& e) {
  624. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  625. }
  626. }
  627. template <>
  628. TUnboxedValue TSplit<TDate32>::Run(
  629. const IValueBuilder* valueBuilder,
  630. const TUnboxedValuePod* args) const
  631. {
  632. try {
  633. EMPTY_RESULT_ON_EMPTY_ARG(0);
  634. TUnboxedValuePod result(0);
  635. auto& storage = Reference64(result);
  636. storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>());
  637. return result;
  638. } catch (const std::exception& e) {
  639. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  640. }
  641. }
  642. template <>
  643. TUnboxedValue TSplit<TDatetime>::Run(
  644. const IValueBuilder* valueBuilder,
  645. const TUnboxedValuePod* args) const
  646. {
  647. try {
  648. EMPTY_RESULT_ON_EMPTY_ARG(0);
  649. auto& builder = valueBuilder->GetDateBuilder();
  650. TUnboxedValuePod result(0);
  651. auto& storage = Reference(result);
  652. storage.FromDatetime(builder, args[0].Get<ui32>());
  653. return result;
  654. } catch (const std::exception& e) {
  655. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  656. }
  657. }
  658. template <>
  659. TUnboxedValue TSplit<TDatetime64>::Run(
  660. const IValueBuilder* valueBuilder,
  661. const TUnboxedValuePod* args) const
  662. {
  663. try {
  664. EMPTY_RESULT_ON_EMPTY_ARG(0);
  665. TUnboxedValuePod result(0);
  666. auto& storage = Reference64(result);
  667. storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
  668. return result;
  669. } catch (const std::exception& e) {
  670. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  671. }
  672. }
  673. template <>
  674. TUnboxedValue TSplit<TTimestamp>::Run(
  675. const IValueBuilder* valueBuilder,
  676. const TUnboxedValuePod* args) const
  677. {
  678. try {
  679. EMPTY_RESULT_ON_EMPTY_ARG(0);
  680. auto& builder = valueBuilder->GetDateBuilder();
  681. TUnboxedValuePod result(0);
  682. auto& storage = Reference(result);
  683. storage.FromTimestamp(builder, args[0].Get<ui64>());
  684. return result;
  685. } catch (const std::exception& e) {
  686. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  687. }
  688. }
  689. template <>
  690. TUnboxedValue TSplit<TTimestamp64>::Run(
  691. const IValueBuilder* valueBuilder,
  692. const TUnboxedValuePod* args) const
  693. {
  694. try {
  695. EMPTY_RESULT_ON_EMPTY_ARG(0);
  696. TUnboxedValuePod result(0);
  697. auto& storage = Reference64(result);
  698. storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
  699. return result;
  700. } catch (const std::exception& e) {
  701. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  702. }
  703. }
  704. template <>
  705. TUnboxedValue TSplit<TTzDate>::Run(
  706. const IValueBuilder* valueBuilder,
  707. const TUnboxedValuePod* args) const
  708. {
  709. try {
  710. EMPTY_RESULT_ON_EMPTY_ARG(0);
  711. auto& builder = valueBuilder->GetDateBuilder();
  712. TUnboxedValuePod result(0);
  713. auto& storage = Reference(result);
  714. storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId());
  715. return result;
  716. } catch (const std::exception& e) {
  717. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  718. }
  719. }
  720. template <>
  721. TUnboxedValue TSplit<TTzDatetime>::Run(
  722. const IValueBuilder* valueBuilder,
  723. const TUnboxedValuePod* args) const
  724. {
  725. try {
  726. EMPTY_RESULT_ON_EMPTY_ARG(0);
  727. auto& builder = valueBuilder->GetDateBuilder();
  728. TUnboxedValuePod result(0);
  729. auto& storage = Reference(result);
  730. storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId());
  731. return result;
  732. } catch (const std::exception& e) {
  733. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  734. }
  735. }
  736. template <>
  737. TUnboxedValue TSplit<TTzTimestamp>::Run(
  738. const IValueBuilder* valueBuilder,
  739. const TUnboxedValuePod* args) const
  740. {
  741. try {
  742. EMPTY_RESULT_ON_EMPTY_ARG(0);
  743. auto& builder = valueBuilder->GetDateBuilder();
  744. TUnboxedValuePod result(0);
  745. auto& storage = Reference(result);
  746. storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId());
  747. return result;
  748. } catch (const std::exception& e) {
  749. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  750. }
  751. }
  752. // Make*
  753. template<typename TUserDataType, bool Nullable>
  754. using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result,
  755. TTzDateArrayBuilder<TUserDataType, Nullable>,
  756. TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>;
  757. template<typename TUserDataType>
  758. struct TMakeDateKernelExec : TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> {
  759. static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder);
  760. template<typename TSink>
  761. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  762. auto& storage = Reference(item);
  763. sink(TBlockItem(Make(storage, *valueBuilder)));
  764. }
  765. };
  766. template<> TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  767. TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false));
  768. return res;
  769. }
  770. template<> TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  771. TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
  772. return res;
  773. }
  774. template<> TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  775. TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
  776. return res;
  777. }
  778. template<> TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  779. TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true));
  780. res.SetTimezoneId(storage.TimezoneId);
  781. return res;
  782. }
  783. template<> TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  784. TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
  785. res.SetTimezoneId(storage.TimezoneId);
  786. return res;
  787. }
  788. template<> TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  789. TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
  790. res.SetTimezoneId(storage.TimezoneId);
  791. return res;
  792. }
  793. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) {
  794. auto& builder = valueBuilder->GetDateBuilder();
  795. auto& storage = Reference(args[0]);
  796. return TUnboxedValuePod(storage.ToDate(builder, false));
  797. }
  798. END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do);
  799. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) {
  800. auto& builder = valueBuilder->GetDateBuilder();
  801. auto& storage = Reference(args[0]);
  802. return TUnboxedValuePod(storage.ToDatetime(builder));
  803. }
  804. END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do);
  805. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) {
  806. auto& builder = valueBuilder->GetDateBuilder();
  807. auto& storage = Reference(args[0]);
  808. return TUnboxedValuePod(storage.ToTimestamp(builder));
  809. }
  810. END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do);
  811. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) {
  812. auto& builder = valueBuilder->GetDateBuilder();
  813. auto& storage = Reference(args[0]);
  814. try {
  815. TUnboxedValuePod result(storage.ToDate(builder, true));
  816. result.SetTimezoneId(storage.TimezoneId);
  817. return result;
  818. } catch (const std::exception& e) {
  819. UdfTerminate((TStringBuilder() << Pos_ << "Timestamp "
  820. << storage.ToString()
  821. << " cannot be casted to TzDate"
  822. ).data());
  823. }
  824. }
  825. END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do);
  826. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) {
  827. auto& builder = valueBuilder->GetDateBuilder();
  828. auto& storage = Reference(args[0]);
  829. TUnboxedValuePod result(storage.ToDatetime(builder));
  830. result.SetTimezoneId(storage.TimezoneId);
  831. return result;
  832. }
  833. END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do);
  834. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) {
  835. auto& builder = valueBuilder->GetDateBuilder();
  836. auto& storage = Reference(args[0]);
  837. TUnboxedValuePod result(storage.ToTimestamp(builder));
  838. result.SetTimezoneId(storage.TimezoneId);
  839. return result;
  840. }
  841. END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do);
  842. SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) {
  843. Y_UNUSED(valueBuilder);
  844. TUnboxedValuePod result(0);
  845. auto& arg = Reference(args[0]);
  846. auto& storage = Reference64(result);
  847. storage.From(arg);
  848. return result;
  849. }
  850. SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) {
  851. auto& storage = Reference64(args[0]);
  852. return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder()));
  853. }
  854. SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) {
  855. auto& storage = Reference64(args[0]);
  856. return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder()));
  857. }
  858. SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) {
  859. auto& storage = Reference64(args[0]);
  860. return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder()));
  861. }
  862. // Get*
  863. // #define GET_METHOD(field, type) \
  864. // struct TGet##field##KernelExec : TUnaryKernelExec<TGet##field##KernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<type, false>> { \
  865. // template<typename TSink> \
  866. // static void Process(TBlockItem item, const IValueBuilder& valueBuilder, const TSink& sink) { \
  867. // Y_UNUSED(valueBuilder); \
  868. // sink(TBlockItem(Get##field(item))); \
  869. // } \
  870. // }; \
  871. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \
  872. // Y_UNUSED(valueBuilder); \
  873. // return TUnboxedValuePod(Get##field(args[0])); \
  874. // } \
  875. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  876. template<const char* TUdfName,
  877. typename TResultType, TResultType (*Accessor)(const TUnboxedValuePod&),
  878. typename TResultWType, TResultWType (*WAccessor)(const TUnboxedValuePod&)>
  879. class TGetDateComponent: public ::NYql::NUdf::TBoxedValue {
  880. public:
  881. typedef bool TTypeAwareMarker;
  882. static const ::NYql::NUdf::TStringRef& Name() {
  883. static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
  884. return name;
  885. }
  886. static bool DeclareSignature(
  887. const ::NYql::NUdf::TStringRef& name,
  888. ::NYql::NUdf::TType* userType,
  889. ::NYql::NUdf::IFunctionTypeInfoBuilder& builder,
  890. bool typesOnly)
  891. {
  892. if (Name() != name) {
  893. return false;
  894. }
  895. if (!userType) {
  896. builder.SetError("User type is missing");
  897. return true;
  898. }
  899. builder.UserType(userType);
  900. const auto typeInfoHelper = builder.TypeInfoHelper();
  901. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  902. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  903. Y_ENSURE(tuple.GetElementsCount() > 0,
  904. "Tuple has to contain positional arguments");
  905. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  906. Y_ENSURE(argsTuple, "Tuple with args expected");
  907. if (argsTuple.GetElementsCount() != 1) {
  908. builder.SetError("Single argument expected");
  909. return true;
  910. }
  911. auto argType = argsTuple.GetElementType(0);
  912. if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  913. argType = optType.GetItemType();
  914. }
  915. TResourceTypeInspector resource(*typeInfoHelper, argType);
  916. if (!resource) {
  917. TDataTypeInspector data(*typeInfoHelper, argType);
  918. if (!data) {
  919. builder.SetError("Data type expected");
  920. return true;
  921. }
  922. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  923. if (features & NUdf::BigDateType) {
  924. BuildSignature<TResultWType, TM64ResourceName, WAccessor>(builder, typesOnly);
  925. return true;
  926. }
  927. if (features & (NUdf::DateType | NUdf::TzDateType)) {
  928. BuildSignature<TResultType, TMResourceName, Accessor>(builder, typesOnly);
  929. return true;
  930. }
  931. ::TStringBuilder sb;
  932. sb << "Invalid argument type: got ";
  933. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  934. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  935. << TM64ResourceName << "> expected";
  936. builder.SetError(sb);
  937. return true;
  938. }
  939. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  940. BuildSignature<TResultWType, TM64ResourceName, WAccessor>(builder, typesOnly);
  941. return true;
  942. }
  943. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  944. BuildSignature<TResultType, TMResourceName, Accessor>(builder, typesOnly);
  945. return true;
  946. }
  947. builder.SetError("Unexpected Resource tag");
  948. return true;
  949. }
  950. private:
  951. template<typename TResult, TResult (*Func)(const TUnboxedValuePod&)>
  952. class TImpl : public TBoxedValue {
  953. public:
  954. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  955. Y_UNUSED(valueBuilder);
  956. EMPTY_RESULT_ON_EMPTY_ARG(0);
  957. return TUnboxedValuePod(TResult(Func(args[0])));
  958. }
  959. };
  960. template<typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)>
  961. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  962. builder.Returns<TResult>();
  963. builder.Args()->Add<TAutoMap<TResource<TResourceName>>>();
  964. builder.IsStrict();
  965. if (!typesOnly) {
  966. builder.Implementation(new TImpl<TResult, Func>());
  967. }
  968. }
  969. };
  970. // TODO: Merge this with <TGetDateComponent> class.
  971. template<const char* TUdfName, auto Accessor, auto WAccessor>
  972. class TGetDateComponentName: public ::NYql::NUdf::TBoxedValue {
  973. public:
  974. typedef bool TTypeAwareMarker;
  975. static const ::NYql::NUdf::TStringRef& Name() {
  976. static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
  977. return name;
  978. }
  979. static bool DeclareSignature(
  980. const ::NYql::NUdf::TStringRef& name,
  981. ::NYql::NUdf::TType* userType,
  982. ::NYql::NUdf::IFunctionTypeInfoBuilder& builder,
  983. bool typesOnly)
  984. {
  985. if (Name() != name) {
  986. return false;
  987. }
  988. if (!userType) {
  989. builder.SetError("User type is missing");
  990. return true;
  991. }
  992. builder.UserType(userType);
  993. const auto typeInfoHelper = builder.TypeInfoHelper();
  994. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  995. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  996. Y_ENSURE(tuple.GetElementsCount() > 0,
  997. "Tuple has to contain positional arguments");
  998. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  999. Y_ENSURE(argsTuple, "Tuple with args expected");
  1000. if (argsTuple.GetElementsCount() != 1) {
  1001. builder.SetError("Single argument expected");
  1002. return true;
  1003. }
  1004. auto argType = argsTuple.GetElementType(0);
  1005. if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  1006. argType = optType.GetItemType();
  1007. }
  1008. TResourceTypeInspector resource(*typeInfoHelper, argType);
  1009. if (!resource) {
  1010. TDataTypeInspector data(*typeInfoHelper, argType);
  1011. if (!data) {
  1012. builder.SetError("Data type expected");
  1013. return true;
  1014. }
  1015. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  1016. if (features & NUdf::BigDateType) {
  1017. BuildSignature<TM64ResourceName, WAccessor>(builder, typesOnly);
  1018. return true;
  1019. }
  1020. if (features & (NUdf::DateType | NUdf::TzDateType)) {
  1021. BuildSignature<TMResourceName, Accessor>(builder, typesOnly);
  1022. return true;
  1023. }
  1024. ::TStringBuilder sb;
  1025. sb << "Invalid argument type: got ";
  1026. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  1027. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  1028. << TM64ResourceName << "> expected";
  1029. builder.SetError(sb);
  1030. return true;
  1031. }
  1032. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  1033. BuildSignature<TM64ResourceName, WAccessor>(builder, typesOnly);
  1034. return true;
  1035. }
  1036. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  1037. BuildSignature<TMResourceName, Accessor>(builder, typesOnly);
  1038. return true;
  1039. }
  1040. builder.SetError("Unexpected Resource tag");
  1041. return true;
  1042. }
  1043. private:
  1044. template<auto Func>
  1045. class TImpl : public TBoxedValue {
  1046. public:
  1047. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  1048. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1049. return Func(valueBuilder, args[0]);
  1050. }
  1051. };
  1052. template<const char* TResourceName, auto Func>
  1053. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  1054. builder.Returns<char*>();
  1055. builder.Args()->Add<TAutoMap<TResource<TResourceName>>>();
  1056. builder.IsStrict();
  1057. if (!typesOnly) {
  1058. builder.Implementation(new TImpl<Func>());
  1059. }
  1060. }
  1061. };
  1062. // template<typename TValue>
  1063. // TValue GetMonthNameValue(size_t idx) {
  1064. // static const std::array<TValue, 12U> monthNames = {{
  1065. // TValue::Embedded(TStringRef::Of("January")),
  1066. // TValue::Embedded(TStringRef::Of("February")),
  1067. // TValue::Embedded(TStringRef::Of("March")),
  1068. // TValue::Embedded(TStringRef::Of("April")),
  1069. // TValue::Embedded(TStringRef::Of("May")),
  1070. // TValue::Embedded(TStringRef::Of("June")),
  1071. // TValue::Embedded(TStringRef::Of("July")),
  1072. // TValue::Embedded(TStringRef::Of("August")),
  1073. // TValue::Embedded(TStringRef::Of("September")),
  1074. // TValue::Embedded(TStringRef::Of("October")),
  1075. // TValue::Embedded(TStringRef::Of("November")),
  1076. // TValue::Embedded(TStringRef::Of("December"))
  1077. // }};
  1078. // return monthNames.at(idx);
  1079. // }
  1080. // struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
  1081. // template<typename TSink>
  1082. // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1083. // Y_UNUSED(valueBuilder);
  1084. // sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U));
  1085. // }
  1086. // };
  1087. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1088. // Y_UNUSED(valueBuilder);
  1089. // return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U);
  1090. // }
  1091. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  1092. template<const char* TResourceName>
  1093. TUnboxedValue GetMonthName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) {
  1094. Y_UNUSED(valueBuilder);
  1095. static const std::array<TUnboxedValue, 12U> monthNames = {{
  1096. TUnboxedValuePod::Embedded(TStringRef::Of("January")),
  1097. TUnboxedValuePod::Embedded(TStringRef::Of("February")),
  1098. TUnboxedValuePod::Embedded(TStringRef::Of("March")),
  1099. TUnboxedValuePod::Embedded(TStringRef::Of("April")),
  1100. TUnboxedValuePod::Embedded(TStringRef::Of("May")),
  1101. TUnboxedValuePod::Embedded(TStringRef::Of("June")),
  1102. TUnboxedValuePod::Embedded(TStringRef::Of("July")),
  1103. TUnboxedValuePod::Embedded(TStringRef::Of("August")),
  1104. TUnboxedValuePod::Embedded(TStringRef::Of("September")),
  1105. TUnboxedValuePod::Embedded(TStringRef::Of("October")),
  1106. TUnboxedValuePod::Embedded(TStringRef::Of("November")),
  1107. TUnboxedValuePod::Embedded(TStringRef::Of("December"))
  1108. }};
  1109. if constexpr (TResourceName == TMResourceName) {
  1110. return monthNames.at(GetMonth(arg) - 1U);
  1111. }
  1112. if constexpr (TResourceName == TM64ResourceName) {
  1113. return monthNames.at(GetWMonth(arg) - 1U);
  1114. }
  1115. Y_UNREACHABLE();
  1116. }
  1117. // struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> {
  1118. // template<typename TSink>
  1119. // static void Process(TBlockItem item, const TSink& sink) {
  1120. // sink(GetDay(item));
  1121. // }
  1122. // };
  1123. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) {
  1124. // Y_UNUSED(valueBuilder);
  1125. // return TUnboxedValuePod(GetDay(args[0]));
  1126. // }
  1127. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  1128. template<const char* TResourceName>
  1129. TUnboxedValue GetDayOfWeekName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) {
  1130. Y_UNUSED(valueBuilder);
  1131. static const std::array<TUnboxedValue, 7U> dayNames = {{
  1132. TUnboxedValuePod::Embedded(TStringRef::Of("Monday")),
  1133. TUnboxedValuePod::Embedded(TStringRef::Of("Tuesday")),
  1134. TUnboxedValuePod::Embedded(TStringRef::Of("Wednesday")),
  1135. TUnboxedValuePod::Embedded(TStringRef::Of("Thursday")),
  1136. TUnboxedValuePod::Embedded(TStringRef::Of("Friday")),
  1137. TUnboxedValuePod::Embedded(TStringRef::Of("Saturday")),
  1138. TUnboxedValuePod::Embedded(TStringRef::Of("Sunday"))
  1139. }};
  1140. if constexpr (TResourceName == TMResourceName) {
  1141. return dayNames.at(GetDayOfWeek(arg) - 1U);
  1142. }
  1143. if constexpr (TResourceName == TM64ResourceName) {
  1144. return dayNames.at(GetWDayOfWeek(arg) - 1U);
  1145. }
  1146. Y_UNREACHABLE();
  1147. }
  1148. // struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
  1149. // template<typename TSink>
  1150. // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1151. // Y_UNUSED(valueBuilder);
  1152. // sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U));
  1153. // }
  1154. // };
  1155. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1156. // Y_UNUSED(valueBuilder);
  1157. // return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U);
  1158. // }
  1159. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  1160. struct TTGetTimezoneNameKernelExec : TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> {
  1161. template<typename TSink>
  1162. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1163. Y_UNUSED(valueBuilder);
  1164. auto timezoneId = GetTimezoneId(item);
  1165. if (timezoneId >= NUdf::GetTimezones().size()) {
  1166. sink(TBlockItem{});
  1167. } else {
  1168. sink(TBlockItem{NUdf::GetTimezones()[timezoneId]});
  1169. }
  1170. }
  1171. };
  1172. BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetTimezoneName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1173. auto timezoneId = GetTimezoneId(args[0]);
  1174. if (timezoneId >= NUdf::GetTimezones().size()) {
  1175. return TUnboxedValuePod();
  1176. }
  1177. return valueBuilder->NewString(NUdf::GetTimezones()[timezoneId]);
  1178. }
  1179. END_SIMPLE_ARROW_UDF(TGetTimezoneName, TTGetTimezoneNameKernelExec::Do);
  1180. template<const char* TResourceName>
  1181. TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) {
  1182. ui16 tzId;
  1183. if constexpr (TResourceName == TMResourceName) {
  1184. tzId = GetTimezoneId(arg);
  1185. }
  1186. if constexpr (TResourceName == TM64ResourceName) {
  1187. tzId = GetWTimezoneId(arg);
  1188. }
  1189. const auto& tzNames = NUdf::GetTimezones();
  1190. if (tzId >= tzNames.size()) {
  1191. return TUnboxedValuePod();
  1192. }
  1193. return valueBuilder->NewString(tzNames[tzId]);
  1194. }
  1195. // Update
  1196. class TUpdate : public TBoxedValue {
  1197. const TSourcePosition Pos_;
  1198. public:
  1199. explicit TUpdate(TSourcePosition pos)
  1200. : Pos_(pos)
  1201. {}
  1202. TUnboxedValue Run(
  1203. const IValueBuilder* valueBuilder,
  1204. const TUnboxedValuePod* args) const override
  1205. {
  1206. try {
  1207. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1208. auto result = args[0];
  1209. if (args[1]) {
  1210. auto year = args[1].Get<ui16>();
  1211. if (!ValidateYear(year)) {
  1212. return TUnboxedValuePod();
  1213. }
  1214. SetYear(result, year);
  1215. }
  1216. if (args[2]) {
  1217. auto month = args[2].Get<ui8>();
  1218. if (!ValidateMonth(month)) {
  1219. return TUnboxedValuePod();
  1220. }
  1221. SetMonth(result, month);
  1222. }
  1223. if (args[3]) {
  1224. auto day = args[3].Get<ui8>();
  1225. if (!ValidateDay(day)) {
  1226. return TUnboxedValuePod();
  1227. }
  1228. SetDay(result, day);
  1229. }
  1230. if (args[4]) {
  1231. auto hour = args[4].Get<ui8>();
  1232. if (!ValidateHour(hour)) {
  1233. return TUnboxedValuePod();
  1234. }
  1235. SetHour(result, hour);
  1236. }
  1237. if (args[5]) {
  1238. auto minute = args[5].Get<ui8>();
  1239. if (!ValidateMinute(minute)) {
  1240. return TUnboxedValuePod();
  1241. }
  1242. SetMinute(result, minute);
  1243. }
  1244. if (args[6]) {
  1245. auto second = args[6].Get<ui8>();
  1246. if (!ValidateSecond(second)) {
  1247. return TUnboxedValuePod();
  1248. }
  1249. SetSecond(result, second);
  1250. }
  1251. if (args[7]) {
  1252. auto microsecond = args[7].Get<ui32>();
  1253. if (!ValidateMicrosecond(microsecond)) {
  1254. return TUnboxedValuePod();
  1255. }
  1256. SetMicrosecond(result, microsecond);
  1257. }
  1258. if (args[8]) {
  1259. auto timezoneId = args[8].Get<ui16>();
  1260. if (!ValidateTimezoneId(timezoneId)) {
  1261. return TUnboxedValuePod();
  1262. }
  1263. SetTimezoneId(result, timezoneId);
  1264. }
  1265. auto& builder = valueBuilder->GetDateBuilder();
  1266. auto& storage = Reference(result);
  1267. if (!storage.Validate(builder)) {
  1268. return TUnboxedValuePod();
  1269. }
  1270. return result;
  1271. } catch (const std::exception& e) {
  1272. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1273. }
  1274. }
  1275. static const TStringRef& Name() {
  1276. static auto name = TStringRef::Of("Update");
  1277. return name;
  1278. }
  1279. static bool DeclareSignature(
  1280. const TStringRef& name,
  1281. TType*,
  1282. IFunctionTypeInfoBuilder& builder,
  1283. bool typesOnly)
  1284. {
  1285. if (Name() != name) {
  1286. return false;
  1287. }
  1288. auto resourceType = builder.Resource(TMResourceName);
  1289. auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
  1290. builder.OptionalArgs(8).Args()->Add(resourceType).Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1291. .Add(builder.Optional()->Item<ui16>().Build()).Name("Year")
  1292. .Add(builder.Optional()->Item<ui8>().Build()).Name("Month")
  1293. .Add(builder.Optional()->Item<ui8>().Build()).Name("Day")
  1294. .Add(builder.Optional()->Item<ui8>().Build()).Name("Hour")
  1295. .Add(builder.Optional()->Item<ui8>().Build()).Name("Minute")
  1296. .Add(builder.Optional()->Item<ui8>().Build()).Name("Second")
  1297. .Add(builder.Optional()->Item<ui32>().Build()).Name("Microsecond")
  1298. .Add(builder.Optional()->Item<ui16>().Build()).Name("TimezoneId");
  1299. builder.Returns(optionalResourceType);
  1300. if (!typesOnly) {
  1301. builder.Implementation(new TUpdate(builder.GetSourcePosition()));
  1302. }
  1303. builder.IsStrict();
  1304. return true;
  1305. }
  1306. };
  1307. // From*
  1308. template<typename TInput, typename TOutput, i64 UsecMultiplier>
  1309. inline TUnboxedValuePod TFromConverter(TInput arg) {
  1310. using TLayout = TDataType<TOutput>::TLayout;
  1311. const TLayout usec = TLayout(arg) * UsecMultiplier;
  1312. return Validate<TOutput>(usec) ? TUnboxedValuePod(usec) : TUnboxedValuePod();
  1313. }
  1314. template<typename TInput, typename TOutput, i64 UsecMultiplier>
  1315. using TFromConverterKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput,
  1316. typename TDataType<TOutput>::TLayout, [] (TInput arg) {
  1317. using TLayout = TDataType<TOutput>::TLayout;
  1318. const TLayout usec = TLayout(arg) * UsecMultiplier;
  1319. return std::make_pair(usec, Validate<TOutput>(usec));
  1320. }>;
  1321. #define DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier) \
  1322. BEGIN_SIMPLE_STRICT_ARROW_UDF(T##name, TOptional<retType>(TAutoMap<argType>)) { \
  1323. Y_UNUSED(valueBuilder); \
  1324. return TFromConverter<argType, retType, usecMultiplier>(args[0].Get<argType>()); \
  1325. } \
  1326. \
  1327. END_SIMPLE_ARROW_UDF(T##name, (TFromConverterKernel<argType, retType, usecMultiplier>::Do))
  1328. DATETIME_FROM_CONVERTER_UDF(FromSeconds, TTimestamp, ui32, UsecondsInSecond);
  1329. DATETIME_FROM_CONVERTER_UDF(FromMilliseconds, TTimestamp, ui64, UsecondsInMilliseconds);
  1330. DATETIME_FROM_CONVERTER_UDF(FromMicroseconds, TTimestamp, ui64, 1);
  1331. DATETIME_FROM_CONVERTER_UDF(FromSeconds64, TTimestamp64, i64, UsecondsInSecond);
  1332. DATETIME_FROM_CONVERTER_UDF(FromMilliseconds64, TTimestamp64, i64, UsecondsInMilliseconds);
  1333. DATETIME_FROM_CONVERTER_UDF(FromMicroseconds64, TTimestamp64, i64, 1);
  1334. DATETIME_FROM_CONVERTER_UDF(IntervalFromDays, TInterval, i32, UsecondsInDay);
  1335. DATETIME_FROM_CONVERTER_UDF(IntervalFromHours, TInterval, i32, UsecondsInHour);
  1336. DATETIME_FROM_CONVERTER_UDF(IntervalFromMinutes, TInterval, i32, UsecondsInMinute);
  1337. DATETIME_FROM_CONVERTER_UDF(IntervalFromSeconds, TInterval, i32, UsecondsInSecond);
  1338. DATETIME_FROM_CONVERTER_UDF(IntervalFromMilliseconds, TInterval, i64, UsecondsInMilliseconds);
  1339. DATETIME_FROM_CONVERTER_UDF(IntervalFromMicroseconds, TInterval, i64, 1);
  1340. DATETIME_FROM_CONVERTER_UDF(Interval64FromDays, TInterval64, i32, UsecondsInDay);
  1341. DATETIME_FROM_CONVERTER_UDF(Interval64FromHours, TInterval64, i64, UsecondsInHour);
  1342. DATETIME_FROM_CONVERTER_UDF(Interval64FromMinutes, TInterval64, i64, UsecondsInMinute);
  1343. DATETIME_FROM_CONVERTER_UDF(Interval64FromSeconds, TInterval64, i64, UsecondsInSecond);
  1344. DATETIME_FROM_CONVERTER_UDF(Interval64FromMilliseconds, TInterval64, i64, UsecondsInMilliseconds);
  1345. DATETIME_FROM_CONVERTER_UDF(Interval64FromMicroseconds, TInterval64, i64, 1);
  1346. // To*
  1347. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToDays, i32(TAutoMap<TInterval>)) {
  1348. Y_UNUSED(valueBuilder);
  1349. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInDay));
  1350. }
  1351. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToDays,
  1352. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInDay); }>),
  1353. arrow::compute::NullHandling::INTERSECTION);
  1354. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToHours, i32(TAutoMap<TInterval>)) {
  1355. Y_UNUSED(valueBuilder);
  1356. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInHour));
  1357. }
  1358. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToHours,
  1359. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInHour); }>),
  1360. arrow::compute::NullHandling::INTERSECTION);
  1361. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToMinutes, i32(TAutoMap<TInterval>)) {
  1362. Y_UNUSED(valueBuilder);
  1363. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInMinute));
  1364. }
  1365. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToMinutes,
  1366. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInMinute); }>),
  1367. arrow::compute::NullHandling::INTERSECTION);
  1368. // StartOf*
  1369. template<auto Core>
  1370. struct TStartOfKernelExec : TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> {
  1371. template<typename TSink>
  1372. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1373. if (auto res = Core(Reference(item), *valueBuilder)) {
  1374. Reference(item) = res.GetRef();
  1375. sink(item);
  1376. } else {
  1377. sink(TBlockItem{});
  1378. }
  1379. }
  1380. };
  1381. template<auto Core>
  1382. TUnboxedValue SimpleDatetimeToDatetimeUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) {
  1383. auto result = args[0];
  1384. auto& storage = Reference(result);
  1385. if (auto res = Core(storage, *valueBuilder)) {
  1386. storage = res.GetRef();
  1387. return result;
  1388. }
  1389. return TUnboxedValuePod{};
  1390. }
  1391. TMaybe<TTMStorage> StartOfYear(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1392. storage.Month = 1;
  1393. storage.Day = 1;
  1394. storage.Hour = 0;
  1395. storage.Minute = 0;
  1396. storage.Second = 0;
  1397. storage.Microsecond = 0;
  1398. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1399. return {};
  1400. }
  1401. return storage;
  1402. }
  1403. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfYear, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1404. return SimpleDatetimeToDatetimeUdf<StartOfYear>(valueBuilder, args);
  1405. }
  1406. END_SIMPLE_ARROW_UDF(TStartOfYear, TStartOfKernelExec<StartOfYear>::Do);
  1407. void SetEndOfDay(TTMStorage& storage) {
  1408. storage.Hour = 23;
  1409. storage.Minute = 59;
  1410. storage.Second = 59;
  1411. storage.Microsecond = 999999;
  1412. }
  1413. TMaybe<TTMStorage> EndOfYear(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1414. storage.Month = 12;
  1415. storage.Day = 31;
  1416. SetEndOfDay(storage);
  1417. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1418. return {};
  1419. }
  1420. return storage;
  1421. }
  1422. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfYear, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1423. return SimpleDatetimeToDatetimeUdf<EndOfYear>(valueBuilder, args);
  1424. }
  1425. END_SIMPLE_ARROW_UDF(TEndOfYear, TStartOfKernelExec<EndOfYear>::Do);
  1426. TMaybe<TTMStorage> StartOfQuarter(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1427. storage.Month = (storage.Month - 1) / 3 * 3 + 1;
  1428. storage.Day = 1;
  1429. storage.Hour = 0;
  1430. storage.Minute = 0;
  1431. storage.Second = 0;
  1432. storage.Microsecond = 0;
  1433. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1434. return {};
  1435. }
  1436. return storage;
  1437. }
  1438. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfQuarter, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1439. return SimpleDatetimeToDatetimeUdf<StartOfQuarter>(valueBuilder, args);
  1440. }
  1441. END_SIMPLE_ARROW_UDF(TStartOfQuarter, TStartOfKernelExec<StartOfQuarter>::Do);
  1442. TMaybe<TTMStorage> EndOfQuarter(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1443. storage.Month = ((storage.Month - 1) / 3 + 1) * 3;
  1444. storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
  1445. SetEndOfDay(storage);
  1446. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1447. return {};
  1448. }
  1449. return storage;
  1450. }
  1451. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfQuarter, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1452. return SimpleDatetimeToDatetimeUdf<EndOfQuarter>(valueBuilder, args);
  1453. }
  1454. END_SIMPLE_ARROW_UDF(TEndOfQuarter, TStartOfKernelExec<EndOfQuarter>::Do);
  1455. TMaybe<TTMStorage> StartOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1456. storage.Day = 1;
  1457. storage.Hour = 0;
  1458. storage.Minute = 0;
  1459. storage.Second = 0;
  1460. storage.Microsecond = 0;
  1461. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1462. return {};
  1463. }
  1464. return storage;
  1465. }
  1466. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1467. return SimpleDatetimeToDatetimeUdf<StartOfMonth>(valueBuilder, args);
  1468. }
  1469. END_SIMPLE_ARROW_UDF(TStartOfMonth, TStartOfKernelExec<StartOfMonth>::Do);
  1470. TMaybe<TTMStorage> EndOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1471. storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
  1472. SetEndOfDay(storage);
  1473. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1474. return {};
  1475. }
  1476. return storage;
  1477. }
  1478. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1479. return SimpleDatetimeToDatetimeUdf<EndOfMonth>(valueBuilder, args);
  1480. }
  1481. END_SIMPLE_ARROW_UDF(TEndOfMonth, TStartOfKernelExec<EndOfMonth>::Do);
  1482. TMaybe<TTMStorage> StartOfWeek(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1483. const ui32 shift = 86400u * (storage.DayOfWeek - 1u);
  1484. if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) {
  1485. return {};
  1486. }
  1487. storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId);
  1488. storage.Hour = 0;
  1489. storage.Minute = 0;
  1490. storage.Second = 0;
  1491. storage.Microsecond = 0;
  1492. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1493. return {};
  1494. }
  1495. return storage;
  1496. }
  1497. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfWeek, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1498. return SimpleDatetimeToDatetimeUdf<StartOfWeek>(valueBuilder, args);
  1499. }
  1500. END_SIMPLE_ARROW_UDF(TStartOfWeek, TStartOfKernelExec<StartOfWeek>::Do);
  1501. TMaybe<TTMStorage> EndOfWeek(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1502. const ui32 shift = 86400u * (7u - storage.DayOfWeek);
  1503. auto dt = storage.ToDatetime(valueBuilder.GetDateBuilder());
  1504. if (NUdf::MAX_DATETIME - shift <= dt) {
  1505. return {};
  1506. }
  1507. storage.FromDatetime(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId);
  1508. SetEndOfDay(storage);
  1509. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1510. return {};
  1511. }
  1512. return storage;
  1513. }
  1514. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfWeek, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1515. return SimpleDatetimeToDatetimeUdf<EndOfWeek>(valueBuilder, args);
  1516. }
  1517. END_SIMPLE_ARROW_UDF(TEndOfWeek, TStartOfKernelExec<EndOfWeek>::Do);
  1518. TMaybe<TTMStorage> StartOfDay(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1519. storage.Hour = 0;
  1520. storage.Minute = 0;
  1521. storage.Second = 0;
  1522. storage.Microsecond = 0;
  1523. auto& builder = valueBuilder.GetDateBuilder();
  1524. if (!storage.Validate(builder)) {
  1525. return {};
  1526. }
  1527. return storage;
  1528. }
  1529. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfDay, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1530. return SimpleDatetimeToDatetimeUdf<StartOfDay>(valueBuilder, args);
  1531. }
  1532. END_SIMPLE_ARROW_UDF(TStartOfDay, TStartOfKernelExec<StartOfDay>::Do);
  1533. TMaybe<TTMStorage> EndOfDay(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1534. SetEndOfDay(storage);
  1535. auto& builder = valueBuilder.GetDateBuilder();
  1536. if (!storage.Validate(builder)) {
  1537. return {};
  1538. }
  1539. return storage;
  1540. }
  1541. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfDay, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1542. return SimpleDatetimeToDatetimeUdf<EndOfDay>(valueBuilder, args);
  1543. }
  1544. END_SIMPLE_ARROW_UDF(TEndOfDay, TStartOfKernelExec<EndOfDay>::Do);
  1545. TMaybe<TTMStorage> StartOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) {
  1546. if (interval >= 86400000000ull) {
  1547. // treat as StartOfDay
  1548. storage.Hour = 0;
  1549. storage.Minute = 0;
  1550. storage.Second = 0;
  1551. storage.Microsecond = 0;
  1552. } else {
  1553. auto current = storage.ToTimeOfDay();
  1554. auto rounded = current / interval * interval;
  1555. storage.FromTimeOfDay(rounded);
  1556. }
  1557. auto& builder = valueBuilder.GetDateBuilder();
  1558. if (!storage.Validate(builder)) {
  1559. return {};
  1560. }
  1561. return storage;
  1562. }
  1563. TMaybe<TTMStorage> EndOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) {
  1564. if (interval >= 86400000000ull) {
  1565. // treat as EndOfDay
  1566. SetEndOfDay(storage);
  1567. } else {
  1568. auto current = storage.ToTimeOfDay();
  1569. auto rounded = current / interval * (interval + 1) - 1;
  1570. storage.FromTimeOfDay(rounded);
  1571. }
  1572. auto& builder = valueBuilder.GetDateBuilder();
  1573. if (!storage.Validate(builder)) {
  1574. return {};
  1575. }
  1576. return storage;
  1577. }
  1578. template<bool UseEnd>
  1579. struct TStartEndOfBinaryKernelExec : TBinaryKernelExec<TStartEndOfBinaryKernelExec<UseEnd>> {
  1580. template<typename TSink>
  1581. static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
  1582. auto& storage = Reference(arg1);
  1583. ui64 interval = std::abs(arg2.Get<i64>());
  1584. if (interval == 0) {
  1585. sink(arg1);
  1586. return;
  1587. }
  1588. if (auto res = (UseEnd ? EndOf : StartOf)(storage, interval, *valueBuilder)) {
  1589. storage = res.GetRef();
  1590. sink(arg1);
  1591. } else {
  1592. sink(TBlockItem{});
  1593. }
  1594. }
  1595. };
  1596. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) {
  1597. auto result = args[0];
  1598. ui64 interval = std::abs(args[1].Get<i64>());
  1599. if (interval == 0) {
  1600. return result;
  1601. }
  1602. if (auto res = StartOf(Reference(result), interval, *valueBuilder)) {
  1603. Reference(result) = res.GetRef();
  1604. return result;
  1605. }
  1606. return TUnboxedValuePod{};
  1607. }
  1608. END_SIMPLE_ARROW_UDF(TStartOf, TStartEndOfBinaryKernelExec<false>::Do);
  1609. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) {
  1610. auto result = args[0];
  1611. ui64 interval = std::abs(args[1].Get<i64>());
  1612. if (interval == 0) {
  1613. return result;
  1614. }
  1615. if (auto res = EndOf(Reference(result), interval, *valueBuilder)) {
  1616. Reference(result) = res.GetRef();
  1617. return result;
  1618. }
  1619. return TUnboxedValuePod{};
  1620. }
  1621. END_SIMPLE_ARROW_UDF(TEndOf, TStartEndOfBinaryKernelExec<true>::Do);
  1622. struct TTimeOfDayKernelExec : TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> {
  1623. template<typename TSink>
  1624. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1625. Y_UNUSED(valueBuilder);
  1626. auto& storage = Reference(item);
  1627. sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()});
  1628. }
  1629. };
  1630. const auto timeOfDayKernelExecDo = TTimeOfDayKernelExec::Do;
  1631. BEGIN_SIMPLE_STRICT_ARROW_UDF(TTimeOfDay, TInterval(TAutoMap<TResource<TMResourceName>>)) {
  1632. Y_UNUSED(valueBuilder);
  1633. auto& storage = Reference(args[0]);
  1634. return TUnboxedValuePod((i64)storage.ToTimeOfDay());
  1635. }
  1636. END_SIMPLE_ARROW_UDF(TTimeOfDay, timeOfDayKernelExecDo);
  1637. // Add ...
  1638. template<auto Core>
  1639. struct TAddKernelExec : TBinaryKernelExec<TAddKernelExec<Core>> {
  1640. template<typename TSink>
  1641. static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) {
  1642. sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder()));
  1643. }
  1644. };
  1645. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftYears, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1646. return DoAddYears(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1647. }
  1648. END_SIMPLE_ARROW_UDF(TShiftYears, TAddKernelExec<DoAddYears<TBlockItem>>::Do);
  1649. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftQuarters, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1650. return DoAddQuarters(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1651. }
  1652. END_SIMPLE_ARROW_UDF(TShiftQuarters, TAddKernelExec<DoAddQuarters<TBlockItem>>::Do);
  1653. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftMonths, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1654. return DoAddMonths(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1655. }
  1656. END_SIMPLE_ARROW_UDF(TShiftMonths, TAddKernelExec<DoAddMonths<TBlockItem>>::Do);
  1657. template<size_t Digits, bool Exacly = true>
  1658. struct PrintNDigits;
  1659. template<bool Exacly>
  1660. struct PrintNDigits<0U, Exacly> {
  1661. static constexpr ui32 Miltiplier = 1U;
  1662. template <typename T>
  1663. static constexpr size_t Do(T, char*) { return 0U; }
  1664. };
  1665. template<size_t Digits, bool Exacly>
  1666. struct PrintNDigits {
  1667. using TNextPrint = PrintNDigits<Digits - 1U, Exacly>;
  1668. static constexpr ui32 Miltiplier = TNextPrint::Miltiplier * 10U;
  1669. template <typename T>
  1670. static constexpr size_t Do(T in, char* out) {
  1671. in %= Miltiplier;
  1672. if (Exacly || in) {
  1673. *out = "0123456789"[in / TNextPrint::Miltiplier];
  1674. return 1U + TNextPrint::Do(in, ++out);
  1675. }
  1676. return 0U;
  1677. }
  1678. };
  1679. // Format
  1680. class TFormat : public TBoxedValue {
  1681. public:
  1682. explicit TFormat(TSourcePosition pos)
  1683. : Pos_(pos)
  1684. {}
  1685. static const TStringRef& Name() {
  1686. static auto name = TStringRef::Of("Format");
  1687. return name;
  1688. }
  1689. static bool DeclareSignature(
  1690. const TStringRef& name,
  1691. TType*,
  1692. IFunctionTypeInfoBuilder& builder,
  1693. bool typesOnly)
  1694. {
  1695. if (Name() != name) {
  1696. return false;
  1697. }
  1698. auto resourceType = builder.Resource(TMResourceName);
  1699. auto stringType = builder.SimpleType<char*>();
  1700. auto boolType = builder.SimpleType<bool>();
  1701. auto optionalBoolType = builder.Optional()->Item(boolType).Build();
  1702. auto args = builder.Args();
  1703. args->Add(stringType);
  1704. args->Add(optionalBoolType).Name("AlwaysWriteFractionalSeconds");
  1705. args->Done();
  1706. builder.OptionalArgs(1);
  1707. builder.Returns(
  1708. builder.Callable(1)
  1709. ->Returns(stringType)
  1710. .Arg(resourceType)
  1711. .Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1712. .Build()
  1713. );
  1714. if (!typesOnly) {
  1715. builder.Implementation(new TFormat(builder.GetSourcePosition()));
  1716. }
  1717. return true;
  1718. }
  1719. private:
  1720. using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>;
  1721. struct TDataPrinter {
  1722. const std::string_view Data;
  1723. size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const {
  1724. std::memcpy(out, Data.data(), Data.size());
  1725. return Data.size();
  1726. }
  1727. };
  1728. TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
  1729. bool alwaysWriteFractionalSeconds = false;
  1730. if (auto val = args[1]) {
  1731. alwaysWriteFractionalSeconds = val.Get<bool>();
  1732. }
  1733. return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds));
  1734. } catch (const std::exception& e) {
  1735. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1736. }
  1737. class TImpl : public TBoxedValue {
  1738. public:
  1739. TUnboxedValue Run(
  1740. const IValueBuilder* valueBuilder,
  1741. const TUnboxedValuePod* args) const override
  1742. {
  1743. try {
  1744. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1745. const auto value = args[0];
  1746. auto& builder = valueBuilder->GetDateBuilder();
  1747. auto result = valueBuilder->NewStringNotFilled(ReservedSize_);
  1748. auto pos = result.AsStringRef().Data();
  1749. ui32 size = 0U;
  1750. for (const auto& printer : Printers_) {
  1751. if (const auto plus = printer(pos, value, builder)) {
  1752. size += plus;
  1753. pos += plus;
  1754. }
  1755. }
  1756. if (size < ReservedSize_) {
  1757. result = valueBuilder->SubString(result.Release(), 0U, size);
  1758. }
  1759. return result;
  1760. } catch (const std::exception& e) {
  1761. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1762. }
  1763. }
  1764. TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds)
  1765. : Pos_(pos)
  1766. , Format_(format)
  1767. {
  1768. const std::string_view formatView(Format_.AsStringRef());
  1769. auto dataStart = formatView.begin();
  1770. size_t dataSize = 0U;
  1771. for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
  1772. if (*ptr != '%') {
  1773. ++dataSize;
  1774. continue;
  1775. }
  1776. if (dataSize) {
  1777. Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)});
  1778. ReservedSize_ += dataSize;
  1779. dataSize = 0U;
  1780. }
  1781. if (formatView.end() == ++ptr) {
  1782. ythrow yexception() << "format string ends with single %%";
  1783. }
  1784. switch (*ptr) {
  1785. case '%': {
  1786. static constexpr size_t size = 1;
  1787. Printers_.emplace_back([](char* out, const TUnboxedValuePod&, const IDateBuilder&) {
  1788. *out = '%';
  1789. return size;
  1790. });
  1791. ReservedSize_ += size;
  1792. break;
  1793. }
  1794. case 'Y': {
  1795. static constexpr size_t size = 4;
  1796. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1797. return PrintNDigits<size>::Do(GetYear(value), out);
  1798. });
  1799. ReservedSize_ += size;
  1800. break;
  1801. }
  1802. case 'm': {
  1803. static constexpr size_t size = 2;
  1804. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1805. return PrintNDigits<size>::Do(GetMonth(value), out);
  1806. });
  1807. ReservedSize_ += size;
  1808. break;
  1809. }
  1810. case 'd': {
  1811. static constexpr size_t size = 2;
  1812. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1813. return PrintNDigits<size>::Do(GetDay(value), out);
  1814. });
  1815. ReservedSize_ += size;
  1816. break;
  1817. }
  1818. case 'H': {
  1819. static constexpr size_t size = 2;
  1820. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1821. return PrintNDigits<size>::Do(GetHour(value), out);
  1822. });
  1823. ReservedSize_ += size;
  1824. break;
  1825. }
  1826. case 'M': {
  1827. static constexpr size_t size = 2;
  1828. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1829. return PrintNDigits<size>::Do(GetMinute(value), out);
  1830. });
  1831. ReservedSize_ += size;
  1832. break;
  1833. }
  1834. case 'S':
  1835. Printers_.emplace_back([alwaysWriteFractionalSeconds](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1836. constexpr size_t size = 2;
  1837. if (const auto microsecond = GetMicrosecond(value); microsecond || alwaysWriteFractionalSeconds) {
  1838. out += PrintNDigits<size>::Do(GetSecond(value), out);
  1839. *out++ = '.';
  1840. constexpr size_t msize = 6;
  1841. auto addSz = alwaysWriteFractionalSeconds ?
  1842. PrintNDigits<msize, true>::Do(microsecond, out) :
  1843. PrintNDigits<msize, false>::Do(microsecond, out);
  1844. return size + 1U + addSz;
  1845. }
  1846. return PrintNDigits<size>::Do(GetSecond(value), out);
  1847. });
  1848. ReservedSize_ += 9;
  1849. break;
  1850. case 'z': {
  1851. static constexpr size_t size = 5;
  1852. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder& builder) {
  1853. auto timezoneId = GetTimezoneId(value);
  1854. if (TTMStorage::IsUniversal(timezoneId)) {
  1855. std::memcpy(out, "+0000", size);
  1856. return size;
  1857. }
  1858. i32 shift;
  1859. if (!builder.GetTimezoneShift(GetYear(value), GetMonth(value), GetDay(value),
  1860. GetHour(value), GetMinute(value), GetSecond(value), timezoneId, shift))
  1861. {
  1862. std::memcpy(out, "+0000", size);
  1863. return size;
  1864. }
  1865. *out++ = shift > 0 ? '+' : '-';
  1866. shift = std::abs(shift);
  1867. out += PrintNDigits<2U>::Do(shift / 60U, out);
  1868. out += PrintNDigits<2U>::Do(shift % 60U, out);
  1869. return size;
  1870. });
  1871. ReservedSize_ += size;
  1872. break;
  1873. }
  1874. case 'Z':
  1875. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1876. const auto timezoneId = GetTimezoneId(value);
  1877. const auto tzName = NUdf::GetTimezones()[timezoneId];
  1878. std::memcpy(out, tzName.data(), std::min(tzName.size(), MAX_TIMEZONE_NAME_LEN));
  1879. return tzName.size();
  1880. });
  1881. ReservedSize_ += MAX_TIMEZONE_NAME_LEN;
  1882. break;
  1883. case 'b': {
  1884. static constexpr size_t size = 3;
  1885. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1886. static constexpr std::string_view mp[] {
  1887. "Jan",
  1888. "Feb",
  1889. "Mar",
  1890. "Apr",
  1891. "May",
  1892. "Jun",
  1893. "Jul",
  1894. "Aug",
  1895. "Sep",
  1896. "Oct",
  1897. "Nov",
  1898. "Dec"
  1899. };
  1900. auto month = GetMonth(value);
  1901. Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
  1902. std::memcpy(out, mp[month - 1].data(), size);
  1903. return size;
  1904. });
  1905. ReservedSize_ += size;
  1906. break;
  1907. }
  1908. case 'B': {
  1909. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1910. static constexpr std::string_view mp[] {
  1911. "January",
  1912. "February",
  1913. "March",
  1914. "April",
  1915. "May",
  1916. "June",
  1917. "July",
  1918. "August",
  1919. "September",
  1920. "October",
  1921. "November",
  1922. "December"
  1923. };
  1924. auto month = GetMonth(value);
  1925. Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
  1926. const std::string_view monthFullName = mp[month - 1];
  1927. std::memcpy(out, monthFullName.data(), monthFullName.size());
  1928. return monthFullName.size();
  1929. });
  1930. ReservedSize_ += 9U; // MAX_MONTH_FULL_NAME_LEN
  1931. break;
  1932. }
  1933. default:
  1934. ythrow yexception() << "invalid format character: " << *ptr;
  1935. }
  1936. dataStart = ptr + 1U;
  1937. }
  1938. if (dataSize) {
  1939. Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)});
  1940. ReservedSize_ += dataSize;
  1941. }
  1942. }
  1943. private:
  1944. const TSourcePosition Pos_;
  1945. TUnboxedValue Format_;
  1946. TPrintersList Printers_{};
  1947. size_t ReservedSize_ = 0;
  1948. };
  1949. const TSourcePosition Pos_;
  1950. };
  1951. template<size_t Digits>
  1952. struct ParseExaclyNDigits;
  1953. template<>
  1954. struct ParseExaclyNDigits<0U> {
  1955. template <typename T>
  1956. static constexpr bool Do(std::string_view::const_iterator&, T&) {
  1957. return true;
  1958. }
  1959. };
  1960. template<size_t Digits>
  1961. struct ParseExaclyNDigits {
  1962. template <typename T>
  1963. static constexpr bool Do(std::string_view::const_iterator& it, T& out) {
  1964. const auto d = *it;
  1965. if (!std::isdigit(d)) {
  1966. return false;
  1967. }
  1968. out *= 10U;
  1969. out += d - '0';
  1970. return ParseExaclyNDigits<Digits - 1U>::Do(++it, out);
  1971. }
  1972. };
  1973. // Parse
  1974. class TParse : public TBoxedValue {
  1975. public:
  1976. class TFactory : public TBoxedValue {
  1977. public:
  1978. explicit TFactory(TSourcePosition pos)
  1979. : Pos_(pos)
  1980. {}
  1981. private:
  1982. TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
  1983. return TUnboxedValuePod(new TParse(args[0], Pos_));
  1984. } catch (const std::exception& e) {
  1985. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1986. }
  1987. const TSourcePosition Pos_;
  1988. };
  1989. static const TStringRef& Name() {
  1990. static auto name = TStringRef::Of("Parse");
  1991. return name;
  1992. }
  1993. static bool DeclareSignature(
  1994. const TStringRef& name,
  1995. TType*,
  1996. IFunctionTypeInfoBuilder& builder,
  1997. bool typesOnly)
  1998. {
  1999. if (Name() != name) {
  2000. return false;
  2001. }
  2002. auto resourceType = builder.Resource(TMResourceName);
  2003. auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
  2004. builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap)
  2005. .Add(builder.Optional()->Item<ui16>())
  2006. .Done()
  2007. .OptionalArgs(1);
  2008. builder.RunConfig<char*>().Returns(optionalResourceType);
  2009. if (!typesOnly) {
  2010. builder.Implementation(new TParse::TFactory(builder.GetSourcePosition()));
  2011. }
  2012. return true;
  2013. }
  2014. private:
  2015. const TSourcePosition Pos_;
  2016. const TUnboxedValue Format_;
  2017. std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_;
  2018. struct TDataScanner {
  2019. const std::string_view Data_;
  2020. bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const {
  2021. if (limit < Data_.size() || !std::equal(Data_.begin(), Data_.end(), it)) {
  2022. return false;
  2023. }
  2024. std::advance(it, Data_.size());
  2025. return true;
  2026. }
  2027. };
  2028. TUnboxedValue Run(
  2029. const IValueBuilder* valueBuilder,
  2030. const TUnboxedValuePod* args) const override
  2031. {
  2032. try {
  2033. EMPTY_RESULT_ON_EMPTY_ARG(0);
  2034. const std::string_view buffer = args[0].AsStringRef();
  2035. TUnboxedValuePod result(0);
  2036. auto& storage = Reference(result);
  2037. storage.MakeDefault();
  2038. auto& builder = valueBuilder->GetDateBuilder();
  2039. auto it = buffer.begin();
  2040. for (const auto& scanner : Scanners_) {
  2041. if (!scanner(it, std::distance(it, buffer.end()), result, builder)) {
  2042. return TUnboxedValuePod();
  2043. }
  2044. }
  2045. if (buffer.end() != it || !storage.Validate(builder)) {
  2046. return TUnboxedValuePod();
  2047. }
  2048. return result;
  2049. } catch (const std::exception& e) {
  2050. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  2051. }
  2052. }
  2053. TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos)
  2054. : Pos_(pos)
  2055. , Format_(runConfig)
  2056. {
  2057. const std::string_view formatView(Format_.AsStringRef());
  2058. auto dataStart = formatView.begin();
  2059. size_t dataSize = 0U;
  2060. for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
  2061. if (*ptr != '%') {
  2062. ++dataSize;
  2063. continue;
  2064. }
  2065. if (dataSize) {
  2066. Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
  2067. dataSize = 0;
  2068. }
  2069. if (++ptr == formatView.end()) {
  2070. ythrow yexception() << "format string ends with single %%";
  2071. }
  2072. switch (*ptr) {
  2073. case '%':
  2074. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) {
  2075. return limit > 0U && *it++ == '%';
  2076. });
  2077. break;
  2078. case 'Y': {
  2079. static constexpr size_t size = 4;
  2080. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2081. ui32 year = 0U;
  2082. if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) {
  2083. return false;
  2084. }
  2085. SetYear(result, year);
  2086. return true;
  2087. });
  2088. break;
  2089. }
  2090. case 'm': {
  2091. static constexpr size_t size = 2;
  2092. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2093. ui32 month = 0U;
  2094. if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
  2095. return false;
  2096. }
  2097. SetMonth(result, month);
  2098. return true;
  2099. });
  2100. break;
  2101. }
  2102. case 'd': {
  2103. static constexpr size_t size = 2;
  2104. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2105. ui32 day = 0U;
  2106. if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) {
  2107. return false;
  2108. }
  2109. SetDay(result, day);
  2110. return true;
  2111. });
  2112. break;
  2113. }
  2114. case 'H': {
  2115. static constexpr size_t size = 2;
  2116. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2117. ui32 hour = 0U;
  2118. if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
  2119. return false;
  2120. }
  2121. SetHour(result, hour);
  2122. return true;
  2123. });
  2124. break;
  2125. }
  2126. case 'M': {
  2127. static constexpr size_t size = 2;
  2128. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2129. ui32 minute = 0U;
  2130. if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
  2131. return false;
  2132. }
  2133. SetMinute(result, minute);
  2134. return true;
  2135. });
  2136. break;
  2137. }
  2138. case 'S': {
  2139. static constexpr size_t size = 2;
  2140. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2141. ui32 second = 0U;
  2142. if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
  2143. return false;
  2144. }
  2145. SetSecond(result, second);
  2146. limit -= size;
  2147. if (!limit || *it != '.') {
  2148. return true;
  2149. }
  2150. ++it;
  2151. --limit;
  2152. ui32 usec = 0U;
  2153. size_t digits = 6U;
  2154. for (; limit; --limit) {
  2155. const auto c = *it;
  2156. if (!digits || !std::isdigit(c)) {
  2157. break;
  2158. }
  2159. usec *= 10U;
  2160. usec += c - '0';
  2161. ++it;
  2162. --digits;
  2163. }
  2164. for (; !digits && limit && std::isdigit(*it); --limit, ++it);
  2165. while (digits--) {
  2166. usec *= 10U;
  2167. }
  2168. SetMicrosecond(result, usec);
  2169. return true;
  2170. });
  2171. break;
  2172. }
  2173. case 'Z':
  2174. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder& builder) {
  2175. const auto start = it;
  2176. while (limit > 0 && (std::isalnum(*it) || *it == '/' || *it == '_' || *it == '-' || *it == '+')) {
  2177. ++it;
  2178. --limit;
  2179. }
  2180. const auto size = std::distance(start, it);
  2181. ui32 timezoneId;
  2182. if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) {
  2183. return false;
  2184. }
  2185. SetTimezoneId(result, timezoneId);
  2186. return true;
  2187. });
  2188. break;
  2189. case 'b': {
  2190. static constexpr size_t size = 3;
  2191. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2192. const auto start = it;
  2193. size_t cnt = 0U;
  2194. while (limit > 0 && cnt < size && std::isalpha(*it)) {
  2195. ++it;
  2196. ++cnt;
  2197. --limit;
  2198. }
  2199. const std::string_view monthName{start, cnt};
  2200. ui8 month = 0U;
  2201. if (cnt < size || !ValidateMonthShortName(monthName, month)) {
  2202. return false;
  2203. }
  2204. SetMonth(result, month);
  2205. return true;
  2206. });
  2207. break;
  2208. }
  2209. case 'B': {
  2210. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2211. const auto start = it;
  2212. size_t cnt = 0U;
  2213. while (limit > 0 && std::isalpha(*it)) {
  2214. ++it;
  2215. ++cnt;
  2216. --limit;
  2217. }
  2218. const std::string_view monthName{start, cnt};
  2219. ui8 month = 0U;
  2220. if (!ValidateMonthFullName(monthName, month)) {
  2221. return false;
  2222. }
  2223. SetMonth(result, month);
  2224. return true;
  2225. });
  2226. break;
  2227. }
  2228. default:
  2229. ythrow yexception() << "invalid format character: " << *ptr;
  2230. }
  2231. dataStart = ptr + 1U;
  2232. }
  2233. if (dataSize) {
  2234. Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
  2235. }
  2236. }
  2237. };
  2238. #define PARSE_SPECIFIC_FORMAT(format) \
  2239. SIMPLE_STRICT_UDF(TParse##format, TOptional<TResource<TMResourceName>>(TAutoMap<char*>)) { \
  2240. auto str = args[0].AsStringRef(); \
  2241. TInstant instant; \
  2242. if (!TInstant::TryParse##format(TStringBuf(str.Data(), str.Size()), instant) || instant.Seconds() >= NUdf::MAX_DATETIME) { \
  2243. return TUnboxedValuePod(); \
  2244. } \
  2245. auto& builder = valueBuilder->GetDateBuilder(); \
  2246. TUnboxedValuePod result(0); \
  2247. auto& storage = Reference(result); \
  2248. storage.FromTimestamp(builder, instant.MicroSeconds()); \
  2249. return result; \
  2250. }
  2251. PARSE_SPECIFIC_FORMAT(Rfc822);
  2252. PARSE_SPECIFIC_FORMAT(Iso8601);
  2253. PARSE_SPECIFIC_FORMAT(Http);
  2254. PARSE_SPECIFIC_FORMAT(X509);
  2255. SIMPLE_MODULE(TDateTime2Module,
  2256. TUserDataTypeFuncFactory<true, true, SplitUDF, TSplit,
  2257. TDate,
  2258. TDatetime,
  2259. TTimestamp,
  2260. TTzDate,
  2261. TTzDatetime,
  2262. TTzTimestamp,
  2263. TDate32,
  2264. TDatetime64,
  2265. TTimestamp64>,
  2266. TMakeDate,
  2267. TMakeDatetime,
  2268. TMakeTimestamp,
  2269. TMakeTzDate,
  2270. TMakeTzDatetime,
  2271. TMakeTzTimestamp,
  2272. TConvert,
  2273. TMakeDate32,
  2274. TMakeDatetime64,
  2275. TMakeTimestamp64,
  2276. TGetDateComponent<GetYearUDF, ui16, GetYear, i32, GetWYear>,
  2277. TGetDateComponent<GetDayOfYearUDF, ui16, GetDayOfYear, ui16, GetWDayOfYear>,
  2278. TGetDateComponent<GetMonthUDF, ui8, GetMonth, ui8, GetWMonth>,
  2279. TGetDateComponentName<GetMonthNameUDF, GetMonthName<TMResourceName>, GetMonthName<TM64ResourceName>>,
  2280. TGetDateComponent<GetWeekOfYearUDF, ui8, GetWeekOfYear, ui8, GetWWeekOfYear>,
  2281. TGetDateComponent<GetWeekOfYearIso8601UDF, ui8, GetWeekOfYearIso8601, ui8, GetWWeekOfYearIso8601>,
  2282. TGetDateComponent<GetDayOfMonthUDF, ui8, GetDay, ui8, GetWDay>,
  2283. TGetDateComponent<GetDayOfWeekUDF, ui8, GetDayOfWeek, ui8, GetWDayOfWeek>,
  2284. TGetDateComponentName<GetDayOfWeekNameUDF, GetDayOfWeekName<TMResourceName>, GetDayOfWeekName<TM64ResourceName>>,
  2285. TGetTimeComponent<GetHourUDF, ui8, GetHour, 1u, 3600u, 24u, false>,
  2286. TGetTimeComponent<GetMinuteUDF, ui8, GetMinute, 1u, 60u, 60u, false>,
  2287. TGetTimeComponent<GetSecondUDF, ui8, GetSecond, 1u, 1u, 60u, false>,
  2288. TGetTimeComponent<GetMillisecondOfSecondUDF, ui32, GetMicrosecond, 1000u, 1000u, 1000u, true>,
  2289. TGetTimeComponent<GetMicrosecondOfSecondUDF, ui32, GetMicrosecond, 1u, 1u, 1000000u, true>,
  2290. TGetDateComponent<GetTimezoneIdUDF, ui16, GetTimezoneId, ui16, GetWTimezoneId>,
  2291. TGetDateComponentName<GetTimezoneNameUDF, GetTimezoneName<TMResourceName>, GetTimezoneName<TM64ResourceName>>,
  2292. TUpdate,
  2293. TFromSeconds,
  2294. TFromMilliseconds,
  2295. TFromMicroseconds,
  2296. TFromSeconds64,
  2297. TFromMilliseconds64,
  2298. TFromMicroseconds64,
  2299. TIntervalFromDays,
  2300. TIntervalFromHours,
  2301. TIntervalFromMinutes,
  2302. TIntervalFromSeconds,
  2303. TIntervalFromMilliseconds,
  2304. TIntervalFromMicroseconds,
  2305. TInterval64FromDays,
  2306. TInterval64FromHours,
  2307. TInterval64FromMinutes,
  2308. TInterval64FromSeconds,
  2309. TInterval64FromMilliseconds,
  2310. TInterval64FromMicroseconds,
  2311. TToDays,
  2312. TToHours,
  2313. TToMinutes,
  2314. TStartOfYear,
  2315. TStartOfQuarter,
  2316. TStartOfMonth,
  2317. TStartOfWeek,
  2318. TStartOfDay,
  2319. TStartOf,
  2320. TTimeOfDay,
  2321. TShiftYears,
  2322. TShiftQuarters,
  2323. TShiftMonths,
  2324. TEndOfYear,
  2325. TEndOfQuarter,
  2326. TEndOfMonth,
  2327. TEndOfWeek,
  2328. TEndOfDay,
  2329. TToUnits<ToSecondsUDF, ui32, 1>,
  2330. TToUnits<ToMillisecondsUDF, ui64, 1000>,
  2331. TToUnits<ToMicrosecondsUDF, ui64, 1000000>,
  2332. TFormat,
  2333. TParse,
  2334. TParseRfc822,
  2335. TParseIso8601,
  2336. TParseHttp,
  2337. TParseX509
  2338. )
  2339. }
  2340. REGISTER_MODULES(TDateTime2Module)