datetime_udf.cpp 101 KB


  1. #include <yql/essentials/minikql/mkql_type_ops.h>
  2. #include <yql/essentials/public/udf/tz/udf_tz.h>
  3. #include <yql/essentials/public/udf/udf_helpers.h>
  4. #include <yql/essentials/minikql/datetime/datetime.h>
  5. #include <yql/essentials/minikql/datetime/datetime64.h>
  6. #include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
  7. #include <util/datetime/base.h>
  8. using namespace NKikimr;
  9. using namespace NUdf;
  10. using namespace NYql::DateTime;
  11. extern const char SplitName[] = "Split";
  12. extern const char ToSecondsName[] = "ToSeconds";
  13. extern const char ToMillisecondsName[] = "ToMilliseconds";
  14. extern const char ToMicrosecondsName[] = "ToMicroseconds";
  15. extern const char GetYearName[] = "GetYear";
  16. extern const char GetDayOfYearName[] = "GetDayOfYear";
  17. extern const char GetMonthName[] = "GetMonth";
  18. extern const char GetWeekOfYearName[] = "GetWeekOfYear";
  19. extern const char GetWeekOfYearIso8601Name[] = "GetWeekOfYearIso8601";
  20. extern const char GetDayOfMonthName[] = "GetDayOfMonth";
  21. extern const char GetDayOfWeekName[] = "GetDayOfWeek";
  22. extern const char GetTimezoneIdName[] = "GetTimezoneId";
  23. extern const char GetHourName[] = "GetHour";
  24. extern const char GetMinuteName[] = "GetMinute";
  25. extern const char GetSecondName[] = "GetSecond";
  26. extern const char GetMillisecondOfSecondName[] = "GetMillisecondOfSecond";
  27. extern const char GetMicrosecondOfSecondName[] = "GetMicrosecondOfSecond";
  28. extern const char TMResourceName[] = "DateTime2.TM";
  29. extern const char TM64ResourceName[] = "DateTime2.TM64";
  30. const auto UsecondsInDay = 86400000000ll;
  31. const auto UsecondsInHour = 3600000000ll;
  32. const auto UsecondsInMinute = 60000000ll;
  33. const auto UsecondsInSecond = 1000000ll;
  34. const auto UsecondsInMilliseconds = 1000ll;
  35. template <const char* TFuncName, typename TResult, ui32 ScaleAfterSeconds>
  36. class TToUnits {
  37. public:
  38. typedef bool TTypeAwareMarker;
  39. using TSignedResult = typename std::make_signed<TResult>::type;
  40. static TResult DateCore(ui16 value) {
  41. return value * ui32(86400) * TResult(ScaleAfterSeconds);
  42. }
  43. template<typename TTzDate>
  44. static TResult TzBlockCore(TBlockItem tzDate);
  45. template<>
  46. static TResult TzBlockCore<TTzDate>(TBlockItem tzDate) {
  47. return DateCore(tzDate.Get<ui16>());
  48. }
  49. template<>
  50. static TResult TzBlockCore<TTzDatetime>(TBlockItem tzDate) {
  51. return DatetimeCore(tzDate.Get<ui32>());
  52. }
  53. template<>
  54. static TResult TzBlockCore<TTzTimestamp>(TBlockItem tzDate) {
  55. return TimestampCore(tzDate.Get<ui64>());
  56. }
  57. static TResult DatetimeCore(ui32 value) {
  58. return value * TResult(ScaleAfterSeconds);
  59. }
  60. static TResult TimestampCore(ui64 value) {
  61. return TResult(value / (1000000u / ScaleAfterSeconds));
  62. }
  63. static TSignedResult IntervalCore(i64 value) {
  64. return TSignedResult(value / (1000000u / ScaleAfterSeconds));
  65. }
  66. static const TStringRef& Name() {
  67. static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
  68. return name;
  69. }
  70. template<typename TTzDate, typename TOutput>
  71. static auto MakeTzBlockExec() {
  72. using TReader = TTzDateBlockReader<TTzDate, /*Nullable*/ false>;
  73. return UnaryPreallocatedReaderExecImpl<TReader, TOutput, TzBlockCore<TTzDate>>;
  74. }
  75. static bool DeclareSignature(
  76. const TStringRef& name,
  77. TType* userType,
  78. IFunctionTypeInfoBuilder& builder,
  79. bool typesOnly)
  80. {
  81. if (Name() != name) {
  82. return false;
  83. }
  84. try {
  85. auto typeInfoHelper = builder.TypeInfoHelper();
  86. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  87. Y_ENSURE(tuple);
  88. Y_ENSURE(tuple.GetElementsCount() > 0);
  89. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  90. Y_ENSURE(argsTuple);
  91. if (argsTuple.GetElementsCount() != 1) {
  92. builder.SetError("Expected one argument");
  93. return true;
  94. }
  95. auto argType = argsTuple.GetElementType(0);
  96. TVector<const TType*> argBlockTypes;
  97. argBlockTypes.push_back(argType);
  98. TBlockTypeInspector block(*typeInfoHelper, argType);
  99. if (block) {
  100. Y_ENSURE(!block.IsScalar());
  101. argType = block.GetItemType();
  102. }
  103. bool isOptional = false;
  104. if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  105. argType = opt.GetItemType();
  106. isOptional = true;
  107. }
  108. TDataTypeInspector data(*typeInfoHelper, argType);
  109. if (!data) {
  110. builder.SetError("Expected data type");
  111. return true;
  112. }
  113. auto typeId = data.GetTypeId();
  114. if (!(typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id ||
  115. typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id ||
  116. typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id ||
  117. typeId == TDataType<TInterval>::Id)) {
  118. builder.SetError(TStringBuilder() << "Type " << GetDataTypeInfo(GetDataSlot(typeId)).Name << " is not supported");
  119. }
  120. builder.Args()->Add(argsTuple.GetElementType(0)).Done();
  121. const TType* retType;
  122. if (typeId != TDataType<TInterval>::Id) {
  123. retType = builder.SimpleType<TResult>();
  124. } else {
  125. retType = builder.SimpleType<TSignedResult>();
  126. }
  127. if (isOptional) {
  128. retType = builder.Optional()->Item(retType).Build();
  129. }
  130. auto outputType = retType;
  131. if (block) {
  132. retType = builder.Block(block.IsScalar())->Item(retType).Build();
  133. }
  134. builder.Returns(retType);
  135. builder.SupportsBlocks();
  136. builder.IsStrict();
  137. builder.UserType(userType);
  138. if (!typesOnly) {
  139. if (typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id) {
  140. if (block) {
  141. const auto exec = (typeId == TDataType<TTzDate>::Id)
  142. ? MakeTzBlockExec<TTzDate, TResult>()
  143. : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>;
  144. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  145. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  146. } else {
  147. builder.Implementation(new TUnaryOverOptionalImpl<ui16, TResult, DateCore>());
  148. }
  149. }
  150. if (typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id) {
  151. if (block) {
  152. const auto exec = (typeId == TDataType<TTzDatetime>::Id)
  153. ? MakeTzBlockExec<TTzDatetime, TResult>()
  154. : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>;
  155. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  156. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  157. } else {
  158. builder.Implementation(new TUnaryOverOptionalImpl<ui32, TResult, DatetimeCore>());
  159. }
  160. }
  161. if (typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id) {
  162. if (block) {
  163. const auto exec = (typeId == TDataType<TTzTimestamp>::Id)
  164. ? MakeTzBlockExec<TTzTimestamp, TResult>()
  165. : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>;
  166. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  167. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  168. } else {
  169. builder.Implementation(new TUnaryOverOptionalImpl<ui64, TResult, TimestampCore>());
  170. }
  171. }
  172. if (typeId == TDataType<TInterval>::Id) {
  173. if (block) {
  174. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  175. UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  176. } else {
  177. builder.Implementation(new TUnaryOverOptionalImpl<i64, TSignedResult, IntervalCore>());
  178. }
  179. }
  180. }
  181. } catch (const std::exception& e) {
  182. builder.SetError(TStringBuf(e.what()));
  183. }
  184. return true;
  185. }
  186. };
  187. template <const char* TFuncName, typename TFieldStorage, TFieldStorage (*FieldFunc)(const TUnboxedValuePod&), ui32 Divisor, ui32 Scale, ui32 Limit, bool Fractional>
  188. struct TGetTimeComponent {
  189. typedef bool TTypeAwareMarker;
  190. template <typename TInput, bool AlwaysZero, bool InputFractional>
  191. static TFieldStorage Core(TInput val) {
  192. if constexpr (AlwaysZero) {
  193. return 0;
  194. }
  195. if constexpr (InputFractional) {
  196. if constexpr (Fractional) {
  197. return (val / Scale) % Limit;
  198. } else {
  199. return (val / 1000000u / Scale) % Limit;
  200. }
  201. } else {
  202. if constexpr (Fractional) {
  203. return 0;
  204. } else {
  205. return (val / Scale) % Limit;
  206. }
  207. }
  208. }
  209. class TImpl : public TBoxedValue {
  210. public:
  211. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  212. Y_UNUSED(valueBuilder);
  213. if (!args[0]) {
  214. return {};
  215. }
  216. return TUnboxedValuePod(TFieldStorage((FieldFunc(args[0])) / Divisor));
  217. }
  218. };
  219. static const TStringRef& Name() {
  220. static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
  221. return name;
  222. }
  223. static bool DeclareSignature(
  224. const TStringRef& name,
  225. TType* userType,
  226. IFunctionTypeInfoBuilder& builder,
  227. bool typesOnly)
  228. {
  229. if (Name() != name) {
  230. return false;
  231. }
  232. try {
  233. auto typeInfoHelper = builder.TypeInfoHelper();
  234. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  235. if (tuple) {
  236. Y_ENSURE(tuple.GetElementsCount() > 0);
  237. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  238. Y_ENSURE(argsTuple);
  239. if (argsTuple.GetElementsCount() != 1) {
  240. builder.SetError("Expected one argument");
  241. return true;
  242. }
  243. auto argType = argsTuple.GetElementType(0);
  244. TVector<const TType*> argBlockTypes;
  245. argBlockTypes.push_back(argType);
  246. TBlockTypeInspector block(*typeInfoHelper, argType);
  247. if (block) {
  248. Y_ENSURE(!block.IsScalar());
  249. argType = block.GetItemType();
  250. }
  251. bool isOptional = false;
  252. if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  253. argType = opt.GetItemType();
  254. isOptional = true;
  255. }
  256. TResourceTypeInspector res(*typeInfoHelper, argType);
  257. if (!res) {
  258. TDataTypeInspector data(*typeInfoHelper, argType);
  259. if (!data) {
  260. builder.SetError("Expected data type");
  261. return true;
  262. }
  263. auto typeId = data.GetTypeId();
  264. if (typeId == TDataType<TDate>::Id ||
  265. typeId == TDataType<TDatetime>::Id ||
  266. typeId == TDataType<TTimestamp>::Id) {
  267. builder.Args()->Add(argsTuple.GetElementType(0)).Done();
  268. const TType* retType = builder.SimpleType<TFieldStorage>();
  269. if (isOptional) {
  270. retType = builder.Optional()->Item(retType).Build();
  271. }
  272. auto outputType = retType;
  273. if (block) {
  274. retType = builder.Block(block.IsScalar())->Item(retType).Build();
  275. }
  276. builder.Returns(retType);
  277. builder.SupportsBlocks();
  278. builder.IsStrict();
  279. builder.UserType(userType);
  280. if (!typesOnly) {
  281. if (typeId == TDataType<TDate>::Id) {
  282. if (block) {
  283. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  284. UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  285. } else {
  286. builder.Implementation(new TUnaryOverOptionalImpl<ui16, TFieldStorage, Core<ui16, true, false>>());
  287. }
  288. }
  289. if (typeId == TDataType<TDatetime>::Id) {
  290. if (block) {
  291. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  292. UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  293. } else {
  294. builder.Implementation(new TUnaryOverOptionalImpl<ui32, TFieldStorage, Core<ui32, false, false>>());
  295. }
  296. }
  297. if (typeId == TDataType<TTimestamp>::Id) {
  298. if (block) {
  299. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  300. UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  301. } else {
  302. builder.Implementation(new TUnaryOverOptionalImpl<ui64, TFieldStorage, Core<ui64, false, true>>());
  303. }
  304. }
  305. }
  306. return true;
  307. }
  308. } else {
  309. Y_ENSURE(!block);
  310. if (res.GetTag() != TStringRef::Of(TMResourceName)) {
  311. builder.SetError("Unexpected resource tag");
  312. return true;
  313. }
  314. }
  315. }
  316. // default implementation
  317. builder.Args()->Add<TResource<TMResourceName>>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Done();
  318. builder.Returns<TFieldStorage>();
  319. builder.IsStrict();
  320. if (!typesOnly) {
  321. builder.Implementation(new TImpl());
  322. }
  323. } catch (const std::exception& e) {
  324. builder.SetError(TStringBuf(e.what()));
  325. }
  326. return true;
  327. }
  328. };
  329. namespace {
  330. const TTMStorage& Reference(const NUdf::TUnboxedValuePod& value) {
  331. return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr());
  332. }
  333. TTMStorage& Reference(NUdf::TUnboxedValuePod& value) {
  334. return *reinterpret_cast<TTMStorage*>(value.GetRawPtr());
  335. }
  336. const TTMStorage& Reference(const TBlockItem& value) {
  337. return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr());
  338. }
  339. Y_DECLARE_UNUSED TTMStorage& Reference(TBlockItem& value) {
  340. return *reinterpret_cast<TTMStorage*>(value.GetRawPtr());
  341. }
  342. const TTM64Storage& Reference64(const NUdf::TUnboxedValuePod& value) {
  343. return *reinterpret_cast<const TTM64Storage*>(value.GetRawPtr());
  344. }
  345. TTM64Storage& Reference64(NUdf::TUnboxedValuePod& value) {
  346. return *reinterpret_cast<TTM64Storage*>(value.GetRawPtr());
  347. }
  348. template<typename TValue>
  349. TValue DoAddMonths(const TValue& date, i64 months, const NUdf::IDateBuilder& builder) {
  350. auto result = date;
  351. auto& storage = Reference(result);
  352. if (!NYql::DateTime::DoAddMonths(storage, months, builder)) {
  353. return TValue{};
  354. }
  355. return result;
  356. }
  357. template<typename TValue>
  358. TValue DoAddQuarters(const TValue& date, i64 quarters, const NUdf::IDateBuilder& builder) {
  359. return DoAddMonths(date, quarters * 3ll, builder);
  360. }
  361. template<typename TValue>
  362. TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& builder) {
  363. auto result = date;
  364. auto& storage = Reference(result);
  365. if (!NYql::DateTime::DoAddYears(storage, years, builder)) {
  366. return TValue{};
  367. }
  368. return result;
  369. }
  370. #define ACCESSORS_POLY(field, type, wtype) \
  371. template<typename TValue> \
  372. inline type Get##field(const TValue& tm) { \
  373. return (type)Reference(tm).field; \
  374. } \
  375. template<typename TValue> \
  376. inline wtype GetW##field(const TValue& tm) { \
  377. return (wtype)Reference64(tm).field; \
  378. } \
  379. template<typename TValue> \
  380. inline void Set##field(TValue& tm, type value) { \
  381. Reference(tm).field = value; \
  382. } \
  383. #define ACCESSORS(field, type) \
  384. ACCESSORS_POLY(field, type, type)
  385. ACCESSORS_POLY(Year, ui16, i32)
  386. ACCESSORS(DayOfYear, ui16)
  387. ACCESSORS(WeekOfYear, ui8)
  388. ACCESSORS(WeekOfYearIso8601, ui8)
  389. ACCESSORS(DayOfWeek, ui8)
  390. ACCESSORS(Month, ui8)
  391. ACCESSORS(Day, ui8)
  392. ACCESSORS(Hour, ui8)
  393. ACCESSORS(Minute, ui8)
  394. ACCESSORS(Second, ui8)
  395. ACCESSORS(Microsecond, ui32)
  396. ACCESSORS(TimezoneId, ui16)
  397. #undef ACCESSORS
  398. #undef ACCESSORS_POLY
  399. inline bool ValidateYear(ui16 year) {
  400. return year >= NUdf::MIN_YEAR - 1 || year <= NUdf::MAX_YEAR + 1;
  401. }
  402. inline bool ValidateMonth(ui8 month) {
  403. return month >= 1 && month <= 12;
  404. }
  405. inline bool ValidateDay(ui8 day) {
  406. return day >= 1 && day <= 31;
  407. }
  408. inline bool ValidateHour(ui8 hour) {
  409. return hour < 24;
  410. }
  411. inline bool ValidateMinute(ui8 minute) {
  412. return minute < 60;
  413. }
  414. inline bool ValidateSecond(ui8 second) {
  415. return second < 60;
  416. }
  417. inline bool ValidateMicrosecond(ui32 microsecond) {
  418. return microsecond < 1000000;
  419. }
  420. inline bool ValidateTimezoneId(ui16 timezoneId) {
  421. const auto& zones = NUdf::GetTimezones();
  422. return timezoneId < zones.size() && !zones[timezoneId].empty();
  423. }
  424. inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) {
  425. static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
  426. int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
  427. if (cmp == 0)
  428. return a.size() < b.size();
  429. return cmp < 0;
  430. };
  431. static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
  432. {"jan", 1},
  433. {"feb", 2},
  434. {"mar", 3},
  435. {"apr", 4},
  436. {"may", 5},
  437. {"jun", 6},
  438. {"jul", 7},
  439. {"aug", 8},
  440. {"sep", 9},
  441. {"oct", 10},
  442. {"nov", 11},
  443. {"dec", 12}
  444. };
  445. const auto& it = mp.find(monthName);
  446. if (it != mp.end()) {
  447. month = it -> second;
  448. return true;
  449. }
  450. return false;
  451. }
  452. inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) {
  453. static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
  454. int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
  455. if (cmp == 0)
  456. return a.size() < b.size();
  457. return cmp < 0;
  458. };
  459. static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
  460. {"january", 1},
  461. {"february", 2},
  462. {"march", 3},
  463. {"april", 4},
  464. {"may", 5},
  465. {"june", 6},
  466. {"july", 7},
  467. {"august", 8},
  468. {"september", 9},
  469. {"october", 10},
  470. {"november", 11},
  471. {"december", 12}
  472. };
  473. const auto& it = mp.find(monthName);
  474. if (it != mp.end()) {
  475. month = it -> second;
  476. return true;
  477. }
  478. return false;
  479. }
  480. template<typename TType>
  481. inline bool Validate(typename TDataType<TType>::TLayout arg);
  482. template<>
  483. inline bool Validate<TTimestamp>(ui64 timestamp) {
  484. return timestamp < MAX_TIMESTAMP;
  485. }
  486. template<>
  487. inline bool Validate<TTimestamp64>(i64 timestamp) {
  488. return timestamp >= MIN_TIMESTAMP64 && timestamp <= MAX_TIMESTAMP64;
  489. }
  490. template<>
  491. inline bool Validate<TInterval>(i64 interval) {
  492. return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP);
  493. }
  494. template<>
  495. inline bool Validate<TInterval64>(i64 interval) {
  496. return interval >= -MAX_INTERVAL64 && interval <= MAX_INTERVAL64;
  497. }
  498. // Split
  499. template<typename TUserDataType, bool Nullable>
  500. using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result,
  501. TTzDateBlockReader<TUserDataType, Nullable>,
  502. TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>;
  503. template<typename TUserDataType>
  504. struct TSplitKernelExec : TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> {
  505. static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder);
  506. template<typename TSink>
  507. static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) {
  508. try {
  509. TBlockItem res {0};
  510. Split(arg, Reference(res), *valueBuilder);
  511. sink(res);
  512. } catch (const std::exception& e) {
  513. UdfTerminate((TStringBuilder() << e.what()).data());
  514. }
  515. }
  516. };
  517. template <typename TUserDataType>
  518. class TSplit : public TBoxedValue {
  519. const TSourcePosition Pos_;
  520. public:
  521. explicit TSplit(TSourcePosition pos)
  522. : Pos_(pos)
  523. {}
  524. TUnboxedValue Run(
  525. const IValueBuilder* valueBuilder,
  526. const TUnboxedValuePod* args) const override;
  527. static bool DeclareSignature(
  528. TStringRef name,
  529. TType* userType,
  530. IFunctionTypeInfoBuilder& builder,
  531. bool typesOnly)
  532. {
  533. const auto typeInfoHelper = builder.TypeInfoHelper();
  534. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  535. Y_ENSURE(tuple);
  536. Y_ENSURE(tuple.GetElementsCount() > 0);
  537. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  538. Y_ENSURE(argsTuple);
  539. if (argsTuple.GetElementsCount() != 1) {
  540. builder.SetError("Expected one argument");
  541. return true;
  542. }
  543. auto argType = argsTuple.GetElementType(0);
  544. builder.UserType(userType);
  545. builder.SupportsBlocks();
  546. builder.IsStrict();
  547. TBlockTypeInspector block(*typeInfoHelper, argType);
  548. if (block) {
  549. const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build();
  550. builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap);
  551. const auto* retType = builder.Resource(TMResourceName);
  552. const auto* blockRetType = builder.Block(false)->Item(retType).Build();
  553. builder.Returns(blockRetType);
  554. if (!typesOnly) {
  555. builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(),
  556. TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE));
  557. }
  558. } else {
  559. builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap);
  560. if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::BigDateType) {
  561. builder.Returns(builder.Resource(TM64ResourceName));
  562. } else {
  563. builder.Returns(builder.Resource(TMResourceName));
  564. }
  565. if (!typesOnly) {
  566. builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition()));
  567. }
  568. }
  569. return true;
  570. }
  571. };
  572. template <>
  573. void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  574. storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>());
  575. }
  576. template <>
  577. void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  578. storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>());
  579. }
  580. template <>
  581. void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  582. storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>());
  583. }
  584. template <>
  585. void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  586. storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId());
  587. }
  588. template <>
  589. void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  590. storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId());
  591. }
  592. template <>
  593. void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  594. storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId());
  595. }
  596. template <>
  597. void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  598. ythrow yexception() << "Not implemented";
  599. }
  600. template <>
  601. void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  602. ythrow yexception() << "Not implemented";
  603. }
  604. template <>
  605. void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  606. ythrow yexception() << "Not implemented";
  607. }
  608. template <>
  609. TUnboxedValue TSplit<TDate>::Run(
  610. const IValueBuilder* valueBuilder,
  611. const TUnboxedValuePod* args) const
  612. {
  613. try {
  614. EMPTY_RESULT_ON_EMPTY_ARG(0);
  615. auto& builder = valueBuilder->GetDateBuilder();
  616. TUnboxedValuePod result(0);
  617. auto& storage = Reference(result);
  618. storage.FromDate(builder, args[0].Get<ui16>());
  619. return result;
  620. } catch (const std::exception& e) {
  621. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  622. }
  623. }
  624. template <>
  625. TUnboxedValue TSplit<TDate32>::Run(
  626. const IValueBuilder* valueBuilder,
  627. const TUnboxedValuePod* args) const
  628. {
  629. try {
  630. EMPTY_RESULT_ON_EMPTY_ARG(0);
  631. TUnboxedValuePod result(0);
  632. auto& storage = Reference64(result);
  633. storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>());
  634. return result;
  635. } catch (const std::exception& e) {
  636. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  637. }
  638. }
  639. template <>
  640. TUnboxedValue TSplit<TDatetime>::Run(
  641. const IValueBuilder* valueBuilder,
  642. const TUnboxedValuePod* args) const
  643. {
  644. try {
  645. EMPTY_RESULT_ON_EMPTY_ARG(0);
  646. auto& builder = valueBuilder->GetDateBuilder();
  647. TUnboxedValuePod result(0);
  648. auto& storage = Reference(result);
  649. storage.FromDatetime(builder, args[0].Get<ui32>());
  650. return result;
  651. } catch (const std::exception& e) {
  652. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  653. }
  654. }
  655. template <>
  656. TUnboxedValue TSplit<TDatetime64>::Run(
  657. const IValueBuilder* valueBuilder,
  658. const TUnboxedValuePod* args) const
  659. {
  660. try {
  661. EMPTY_RESULT_ON_EMPTY_ARG(0);
  662. TUnboxedValuePod result(0);
  663. auto& storage = Reference64(result);
  664. storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
  665. return result;
  666. } catch (const std::exception& e) {
  667. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  668. }
  669. }
  670. template <>
  671. TUnboxedValue TSplit<TTimestamp>::Run(
  672. const IValueBuilder* valueBuilder,
  673. const TUnboxedValuePod* args) const
  674. {
  675. try {
  676. EMPTY_RESULT_ON_EMPTY_ARG(0);
  677. auto& builder = valueBuilder->GetDateBuilder();
  678. TUnboxedValuePod result(0);
  679. auto& storage = Reference(result);
  680. storage.FromTimestamp(builder, args[0].Get<ui64>());
  681. return result;
  682. } catch (const std::exception& e) {
  683. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  684. }
  685. }
  686. template <>
  687. TUnboxedValue TSplit<TTimestamp64>::Run(
  688. const IValueBuilder* valueBuilder,
  689. const TUnboxedValuePod* args) const
  690. {
  691. try {
  692. EMPTY_RESULT_ON_EMPTY_ARG(0);
  693. TUnboxedValuePod result(0);
  694. auto& storage = Reference64(result);
  695. storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
  696. return result;
  697. } catch (const std::exception& e) {
  698. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  699. }
  700. }
  701. template <>
  702. TUnboxedValue TSplit<TTzDate>::Run(
  703. const IValueBuilder* valueBuilder,
  704. const TUnboxedValuePod* args) const
  705. {
  706. try {
  707. EMPTY_RESULT_ON_EMPTY_ARG(0);
  708. auto& builder = valueBuilder->GetDateBuilder();
  709. TUnboxedValuePod result(0);
  710. auto& storage = Reference(result);
  711. storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId());
  712. return result;
  713. } catch (const std::exception& e) {
  714. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  715. }
  716. }
  717. template <>
  718. TUnboxedValue TSplit<TTzDatetime>::Run(
  719. const IValueBuilder* valueBuilder,
  720. const TUnboxedValuePod* args) const
  721. {
  722. try {
  723. EMPTY_RESULT_ON_EMPTY_ARG(0);
  724. auto& builder = valueBuilder->GetDateBuilder();
  725. TUnboxedValuePod result(0);
  726. auto& storage = Reference(result);
  727. storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId());
  728. return result;
  729. } catch (const std::exception& e) {
  730. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  731. }
  732. }
  733. template <>
  734. TUnboxedValue TSplit<TTzTimestamp>::Run(
  735. const IValueBuilder* valueBuilder,
  736. const TUnboxedValuePod* args) const
  737. {
  738. try {
  739. EMPTY_RESULT_ON_EMPTY_ARG(0);
  740. auto& builder = valueBuilder->GetDateBuilder();
  741. TUnboxedValuePod result(0);
  742. auto& storage = Reference(result);
  743. storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId());
  744. return result;
  745. } catch (const std::exception& e) {
  746. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  747. }
  748. }
  749. // Make*
  750. template<typename TUserDataType, bool Nullable>
  751. using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result,
  752. TTzDateArrayBuilder<TUserDataType, Nullable>,
  753. TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>;
  754. template<typename TUserDataType>
  755. struct TMakeDateKernelExec : TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> {
  756. static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder);
  757. template<typename TSink>
  758. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  759. auto& storage = Reference(item);
  760. sink(TBlockItem(Make(storage, *valueBuilder)));
  761. }
  762. };
  763. template<> TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  764. TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false));
  765. return res;
  766. }
  767. template<> TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  768. TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
  769. return res;
  770. }
  771. template<> TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  772. TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
  773. return res;
  774. }
  775. template<> TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  776. TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true));
  777. res.SetTimezoneId(storage.TimezoneId);
  778. return res;
  779. }
  780. template<> TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  781. TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
  782. res.SetTimezoneId(storage.TimezoneId);
  783. return res;
  784. }
  785. template<> TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  786. TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
  787. res.SetTimezoneId(storage.TimezoneId);
  788. return res;
  789. }
  790. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) {
  791. auto& builder = valueBuilder->GetDateBuilder();
  792. auto& storage = Reference(args[0]);
  793. return TUnboxedValuePod(storage.ToDate(builder, false));
  794. }
  795. END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do);
  796. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) {
  797. auto& builder = valueBuilder->GetDateBuilder();
  798. auto& storage = Reference(args[0]);
  799. return TUnboxedValuePod(storage.ToDatetime(builder));
  800. }
  801. END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do);
  802. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) {
  803. auto& builder = valueBuilder->GetDateBuilder();
  804. auto& storage = Reference(args[0]);
  805. return TUnboxedValuePod(storage.ToTimestamp(builder));
  806. }
  807. END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do);
  808. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) {
  809. auto& builder = valueBuilder->GetDateBuilder();
  810. auto& storage = Reference(args[0]);
  811. try {
  812. TUnboxedValuePod result(storage.ToDate(builder, true));
  813. result.SetTimezoneId(storage.TimezoneId);
  814. return result;
  815. } catch (const std::exception& e) {
  816. UdfTerminate((TStringBuilder() << Pos_ << "Timestamp "
  817. << storage.ToString()
  818. << " cannot be casted to TzDate"
  819. ).data());
  820. }
  821. }
  822. END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do);
  823. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) {
  824. auto& builder = valueBuilder->GetDateBuilder();
  825. auto& storage = Reference(args[0]);
  826. TUnboxedValuePod result(storage.ToDatetime(builder));
  827. result.SetTimezoneId(storage.TimezoneId);
  828. return result;
  829. }
  830. END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do);
  831. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) {
  832. auto& builder = valueBuilder->GetDateBuilder();
  833. auto& storage = Reference(args[0]);
  834. TUnboxedValuePod result(storage.ToTimestamp(builder));
  835. result.SetTimezoneId(storage.TimezoneId);
  836. return result;
  837. }
  838. END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do);
  839. SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) {
  840. Y_UNUSED(valueBuilder);
  841. TUnboxedValuePod result(0);
  842. auto& arg = Reference(args[0]);
  843. auto& storage = Reference64(result);
  844. storage.From(arg);
  845. return result;
  846. }
  847. SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) {
  848. auto& storage = Reference64(args[0]);
  849. return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder()));
  850. }
  851. SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) {
  852. auto& storage = Reference64(args[0]);
  853. return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder()));
  854. }
  855. SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) {
  856. auto& storage = Reference64(args[0]);
  857. return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder()));
  858. }
  859. // Get*
  860. // #define GET_METHOD(field, type) \
  861. // struct TGet##field##KernelExec : TUnaryKernelExec<TGet##field##KernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<type, false>> { \
  862. // template<typename TSink> \
  863. // static void Process(TBlockItem item, const IValueBuilder& valueBuilder, const TSink& sink) { \
  864. // Y_UNUSED(valueBuilder); \
  865. // sink(TBlockItem(Get##field(item))); \
  866. // } \
  867. // }; \
  868. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \
  869. // Y_UNUSED(valueBuilder); \
  870. // return TUnboxedValuePod(Get##field(args[0])); \
  871. // } \
  872. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  873. template<const char* TUdfName,
  874. typename TResultType, TResultType (*Accessor)(const TUnboxedValuePod&),
  875. typename TResultWType, TResultWType (*WAccessor)(const TUnboxedValuePod&)>
  876. class TGetDateComponent: public ::NYql::NUdf::TBoxedValue {
  877. public:
  878. typedef bool TTypeAwareMarker;
  879. static const ::NYql::NUdf::TStringRef& Name() {
  880. static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
  881. return name;
  882. }
  883. static bool DeclareSignature(
  884. const ::NYql::NUdf::TStringRef& name,
  885. ::NYql::NUdf::TType* userType,
  886. ::NYql::NUdf::IFunctionTypeInfoBuilder& builder,
  887. bool typesOnly)
  888. {
  889. if (Name() != name) {
  890. return false;
  891. }
  892. if (!userType) {
  893. builder.SetError("User type is missing");
  894. return true;
  895. }
  896. builder.UserType(userType);
  897. const auto typeInfoHelper = builder.TypeInfoHelper();
  898. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  899. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  900. Y_ENSURE(tuple.GetElementsCount() > 0,
  901. "Tuple has to contain positional arguments");
  902. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  903. Y_ENSURE(argsTuple, "Tuple with args expected");
  904. if (argsTuple.GetElementsCount() != 1) {
  905. builder.SetError("Single argument expected");
  906. return true;
  907. }
  908. auto argType = argsTuple.GetElementType(0);
  909. if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  910. argType = optType.GetItemType();
  911. }
  912. TResourceTypeInspector resource(*typeInfoHelper, argType);
  913. if (!resource) {
  914. TDataTypeInspector data(*typeInfoHelper, argType);
  915. if (!data) {
  916. builder.SetError("Data type expected");
  917. return true;
  918. }
  919. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  920. if (features & NUdf::BigDateType) {
  921. BuildSignature<TResultWType, TM64ResourceName, WAccessor>(builder, typesOnly);
  922. return true;
  923. }
  924. if (features & (NUdf::DateType | NUdf::TzDateType)) {
  925. BuildSignature<TResultType, TMResourceName, Accessor>(builder, typesOnly);
  926. return true;
  927. }
  928. ::TStringBuilder sb;
  929. sb << "Invalid argument type: got ";
  930. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  931. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  932. << TM64ResourceName << "> expected";
  933. builder.SetError(sb);
  934. return true;
  935. }
  936. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  937. BuildSignature<TResultWType, TM64ResourceName, WAccessor>(builder, typesOnly);
  938. return true;
  939. }
  940. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  941. BuildSignature<TResultType, TMResourceName, Accessor>(builder, typesOnly);
  942. return true;
  943. }
  944. builder.SetError("Unexpected Resource tag");
  945. return true;
  946. }
  947. private:
  948. template<typename TResult, TResult (*Func)(const TUnboxedValuePod&)>
  949. class TImpl : public TBoxedValue {
  950. public:
  951. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  952. Y_UNUSED(valueBuilder);
  953. EMPTY_RESULT_ON_EMPTY_ARG(0);
  954. return TUnboxedValuePod(TResult(Func(args[0])));
  955. }
  956. };
  957. template<typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)>
  958. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  959. builder.Returns<TResult>();
  960. builder.Args()->Add<TAutoMap<TResource<TResourceName>>>();
  961. builder.IsStrict();
  962. if (!typesOnly) {
  963. builder.Implementation(new TImpl<TResult, Func>());
  964. }
  965. }
  966. };
  967. // template<typename TValue>
  968. // TValue GetMonthNameValue(size_t idx) {
  969. // static const std::array<TValue, 12U> monthNames = {{
  970. // TValue::Embedded(TStringRef::Of("January")),
  971. // TValue::Embedded(TStringRef::Of("February")),
  972. // TValue::Embedded(TStringRef::Of("March")),
  973. // TValue::Embedded(TStringRef::Of("April")),
  974. // TValue::Embedded(TStringRef::Of("May")),
  975. // TValue::Embedded(TStringRef::Of("June")),
  976. // TValue::Embedded(TStringRef::Of("July")),
  977. // TValue::Embedded(TStringRef::Of("August")),
  978. // TValue::Embedded(TStringRef::Of("September")),
  979. // TValue::Embedded(TStringRef::Of("October")),
  980. // TValue::Embedded(TStringRef::Of("November")),
  981. // TValue::Embedded(TStringRef::Of("December"))
  982. // }};
  983. // return monthNames.at(idx);
  984. // }
  985. // struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
  986. // template<typename TSink>
  987. // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  988. // Y_UNUSED(valueBuilder);
  989. // sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U));
  990. // }
  991. // };
  992. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) {
  993. // Y_UNUSED(valueBuilder);
  994. // return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U);
  995. // }
  996. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  997. SIMPLE_STRICT_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) {
  998. Y_UNUSED(valueBuilder);
  999. static const std::array<TUnboxedValue, 12U> monthNames = {{
  1000. TUnboxedValuePod::Embedded(TStringRef::Of("January")),
  1001. TUnboxedValuePod::Embedded(TStringRef::Of("February")),
  1002. TUnboxedValuePod::Embedded(TStringRef::Of("March")),
  1003. TUnboxedValuePod::Embedded(TStringRef::Of("April")),
  1004. TUnboxedValuePod::Embedded(TStringRef::Of("May")),
  1005. TUnboxedValuePod::Embedded(TStringRef::Of("June")),
  1006. TUnboxedValuePod::Embedded(TStringRef::Of("July")),
  1007. TUnboxedValuePod::Embedded(TStringRef::Of("August")),
  1008. TUnboxedValuePod::Embedded(TStringRef::Of("September")),
  1009. TUnboxedValuePod::Embedded(TStringRef::Of("October")),
  1010. TUnboxedValuePod::Embedded(TStringRef::Of("November")),
  1011. TUnboxedValuePod::Embedded(TStringRef::Of("December"))
  1012. }};
  1013. return monthNames.at(GetMonth(*args) - 1U);
  1014. }
  1015. // struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> {
  1016. // template<typename TSink>
  1017. // static void Process(TBlockItem item, const TSink& sink) {
  1018. // sink(GetDay(item));
  1019. // }
  1020. // };
  1021. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) {
  1022. // Y_UNUSED(valueBuilder);
  1023. // return TUnboxedValuePod(GetDay(args[0]));
  1024. // }
  1025. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  1026. template<typename TValue>
  1027. TValue GetDayNameValue(size_t idx) {
  1028. static const std::array<TValue, 7U> dayNames = {{
  1029. TValue::Embedded(TStringRef::Of("Monday")),
  1030. TValue::Embedded(TStringRef::Of("Tuesday")),
  1031. TValue::Embedded(TStringRef::Of("Wednesday")),
  1032. TValue::Embedded(TStringRef::Of("Thursday")),
  1033. TValue::Embedded(TStringRef::Of("Friday")),
  1034. TValue::Embedded(TStringRef::Of("Saturday")),
  1035. TValue::Embedded(TStringRef::Of("Sunday"))
  1036. }};
  1037. return dayNames.at(idx);
  1038. }
  1039. SIMPLE_STRICT_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1040. Y_UNUSED(valueBuilder);
  1041. return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U);
  1042. }
  1043. // struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
  1044. // template<typename TSink>
  1045. // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1046. // Y_UNUSED(valueBuilder);
  1047. // sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U));
  1048. // }
  1049. // };
  1050. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1051. // Y_UNUSED(valueBuilder);
  1052. // return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U);
  1053. // }
  1054. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  1055. struct TTGetTimezoneNameKernelExec : TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> {
  1056. template<typename TSink>
  1057. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1058. Y_UNUSED(valueBuilder);
  1059. auto timezoneId = GetTimezoneId(item);
  1060. if (timezoneId >= NUdf::GetTimezones().size()) {
  1061. sink(TBlockItem{});
  1062. } else {
  1063. sink(TBlockItem{NUdf::GetTimezones()[timezoneId]});
  1064. }
  1065. }
  1066. };
  1067. BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetTimezoneName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1068. auto timezoneId = GetTimezoneId(args[0]);
  1069. if (timezoneId >= NUdf::GetTimezones().size()) {
  1070. return TUnboxedValuePod();
  1071. }
  1072. return valueBuilder->NewString(NUdf::GetTimezones()[timezoneId]);
  1073. }
  1074. END_SIMPLE_ARROW_UDF(TGetTimezoneName, TTGetTimezoneNameKernelExec::Do);
  1075. // Update
  1076. class TUpdate : public TBoxedValue {
  1077. const TSourcePosition Pos_;
  1078. public:
  1079. explicit TUpdate(TSourcePosition pos)
  1080. : Pos_(pos)
  1081. {}
  1082. TUnboxedValue Run(
  1083. const IValueBuilder* valueBuilder,
  1084. const TUnboxedValuePod* args) const override
  1085. {
  1086. try {
  1087. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1088. auto result = args[0];
  1089. if (args[1]) {
  1090. auto year = args[1].Get<ui16>();
  1091. if (!ValidateYear(year)) {
  1092. return TUnboxedValuePod();
  1093. }
  1094. SetYear(result, year);
  1095. }
  1096. if (args[2]) {
  1097. auto month = args[2].Get<ui8>();
  1098. if (!ValidateMonth(month)) {
  1099. return TUnboxedValuePod();
  1100. }
  1101. SetMonth(result, month);
  1102. }
  1103. if (args[3]) {
  1104. auto day = args[3].Get<ui8>();
  1105. if (!ValidateDay(day)) {
  1106. return TUnboxedValuePod();
  1107. }
  1108. SetDay(result, day);
  1109. }
  1110. if (args[4]) {
  1111. auto hour = args[4].Get<ui8>();
  1112. if (!ValidateHour(hour)) {
  1113. return TUnboxedValuePod();
  1114. }
  1115. SetHour(result, hour);
  1116. }
  1117. if (args[5]) {
  1118. auto minute = args[5].Get<ui8>();
  1119. if (!ValidateMinute(minute)) {
  1120. return TUnboxedValuePod();
  1121. }
  1122. SetMinute(result, minute);
  1123. }
  1124. if (args[6]) {
  1125. auto second = args[6].Get<ui8>();
  1126. if (!ValidateSecond(second)) {
  1127. return TUnboxedValuePod();
  1128. }
  1129. SetSecond(result, second);
  1130. }
  1131. if (args[7]) {
  1132. auto microsecond = args[7].Get<ui32>();
  1133. if (!ValidateMicrosecond(microsecond)) {
  1134. return TUnboxedValuePod();
  1135. }
  1136. SetMicrosecond(result, microsecond);
  1137. }
  1138. if (args[8]) {
  1139. auto timezoneId = args[8].Get<ui16>();
  1140. if (!ValidateTimezoneId(timezoneId)) {
  1141. return TUnboxedValuePod();
  1142. }
  1143. SetTimezoneId(result, timezoneId);
  1144. }
  1145. auto& builder = valueBuilder->GetDateBuilder();
  1146. auto& storage = Reference(result);
  1147. if (!storage.Validate(builder)) {
  1148. return TUnboxedValuePod();
  1149. }
  1150. return result;
  1151. } catch (const std::exception& e) {
  1152. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1153. }
  1154. }
  1155. static const TStringRef& Name() {
  1156. static auto name = TStringRef::Of("Update");
  1157. return name;
  1158. }
  1159. static bool DeclareSignature(
  1160. const TStringRef& name,
  1161. TType*,
  1162. IFunctionTypeInfoBuilder& builder,
  1163. bool typesOnly)
  1164. {
  1165. if (Name() != name) {
  1166. return false;
  1167. }
  1168. auto resourceType = builder.Resource(TMResourceName);
  1169. auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
  1170. builder.OptionalArgs(8).Args()->Add(resourceType).Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1171. .Add(builder.Optional()->Item<ui16>().Build()).Name("Year")
  1172. .Add(builder.Optional()->Item<ui8>().Build()).Name("Month")
  1173. .Add(builder.Optional()->Item<ui8>().Build()).Name("Day")
  1174. .Add(builder.Optional()->Item<ui8>().Build()).Name("Hour")
  1175. .Add(builder.Optional()->Item<ui8>().Build()).Name("Minute")
  1176. .Add(builder.Optional()->Item<ui8>().Build()).Name("Second")
  1177. .Add(builder.Optional()->Item<ui32>().Build()).Name("Microsecond")
  1178. .Add(builder.Optional()->Item<ui16>().Build()).Name("TimezoneId");
  1179. builder.Returns(optionalResourceType);
  1180. if (!typesOnly) {
  1181. builder.Implementation(new TUpdate(builder.GetSourcePosition()));
  1182. }
  1183. builder.IsStrict();
  1184. return true;
  1185. }
  1186. };
  1187. // From*
  1188. template<typename TInput, typename TOutput, i64 UsecMultiplier>
  1189. inline TUnboxedValuePod TFromConverter(TInput arg) {
  1190. using TLayout = TDataType<TOutput>::TLayout;
  1191. const TLayout usec = TLayout(arg) * UsecMultiplier;
  1192. return Validate<TOutput>(usec) ? TUnboxedValuePod(usec) : TUnboxedValuePod();
  1193. }
  1194. template<typename TInput, typename TOutput, i64 UsecMultiplier>
  1195. using TFromConverterKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput,
  1196. typename TDataType<TOutput>::TLayout, [] (TInput arg) {
  1197. using TLayout = TDataType<TOutput>::TLayout;
  1198. const TLayout usec = TLayout(arg) * UsecMultiplier;
  1199. return std::make_pair(usec, Validate<TOutput>(usec));
  1200. }>;
  1201. #define DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier) \
  1202. BEGIN_SIMPLE_STRICT_ARROW_UDF(T##name, TOptional<retType>(TAutoMap<argType>)) { \
  1203. Y_UNUSED(valueBuilder); \
  1204. return TFromConverter<argType, retType, usecMultiplier>(args[0].Get<argType>()); \
  1205. } \
  1206. \
  1207. END_SIMPLE_ARROW_UDF(T##name, (TFromConverterKernel<argType, retType, usecMultiplier>::Do))
  1208. DATETIME_FROM_CONVERTER_UDF(FromSeconds, TTimestamp, ui32, UsecondsInSecond);
  1209. DATETIME_FROM_CONVERTER_UDF(FromMilliseconds, TTimestamp, ui64, UsecondsInMilliseconds);
  1210. DATETIME_FROM_CONVERTER_UDF(FromMicroseconds, TTimestamp, ui64, 1);
  1211. DATETIME_FROM_CONVERTER_UDF(FromSeconds64, TTimestamp64, i64, UsecondsInSecond);
  1212. DATETIME_FROM_CONVERTER_UDF(FromMilliseconds64, TTimestamp64, i64, UsecondsInMilliseconds);
  1213. DATETIME_FROM_CONVERTER_UDF(FromMicroseconds64, TTimestamp64, i64, 1);
  1214. DATETIME_FROM_CONVERTER_UDF(IntervalFromDays, TInterval, i32, UsecondsInDay);
  1215. DATETIME_FROM_CONVERTER_UDF(IntervalFromHours, TInterval, i32, UsecondsInHour);
  1216. DATETIME_FROM_CONVERTER_UDF(IntervalFromMinutes, TInterval, i32, UsecondsInMinute);
  1217. DATETIME_FROM_CONVERTER_UDF(IntervalFromSeconds, TInterval, i32, UsecondsInSecond);
  1218. DATETIME_FROM_CONVERTER_UDF(IntervalFromMilliseconds, TInterval, i64, UsecondsInMilliseconds);
  1219. DATETIME_FROM_CONVERTER_UDF(IntervalFromMicroseconds, TInterval, i64, 1);
  1220. DATETIME_FROM_CONVERTER_UDF(Interval64FromDays, TInterval64, i32, UsecondsInDay);
  1221. DATETIME_FROM_CONVERTER_UDF(Interval64FromHours, TInterval64, i64, UsecondsInHour);
  1222. DATETIME_FROM_CONVERTER_UDF(Interval64FromMinutes, TInterval64, i64, UsecondsInMinute);
  1223. DATETIME_FROM_CONVERTER_UDF(Interval64FromSeconds, TInterval64, i64, UsecondsInSecond);
  1224. DATETIME_FROM_CONVERTER_UDF(Interval64FromMilliseconds, TInterval64, i64, UsecondsInMilliseconds);
  1225. DATETIME_FROM_CONVERTER_UDF(Interval64FromMicroseconds, TInterval64, i64, 1);
  1226. // To*
  1227. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToDays, i32(TAutoMap<TInterval>)) {
  1228. Y_UNUSED(valueBuilder);
  1229. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInDay));
  1230. }
  1231. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToDays,
  1232. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInDay); }>),
  1233. arrow::compute::NullHandling::INTERSECTION);
  1234. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToHours, i32(TAutoMap<TInterval>)) {
  1235. Y_UNUSED(valueBuilder);
  1236. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInHour));
  1237. }
  1238. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToHours,
  1239. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInHour); }>),
  1240. arrow::compute::NullHandling::INTERSECTION);
  1241. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToMinutes, i32(TAutoMap<TInterval>)) {
  1242. Y_UNUSED(valueBuilder);
  1243. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInMinute));
  1244. }
  1245. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToMinutes,
  1246. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInMinute); }>),
  1247. arrow::compute::NullHandling::INTERSECTION);
  1248. // StartOf*
  1249. template<auto Core>
  1250. struct TStartOfKernelExec : TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> {
  1251. template<typename TSink>
  1252. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1253. if (auto res = Core(Reference(item), *valueBuilder)) {
  1254. Reference(item) = res.GetRef();
  1255. sink(item);
  1256. } else {
  1257. sink(TBlockItem{});
  1258. }
  1259. }
  1260. };
  1261. template<auto Core>
  1262. TUnboxedValue SimpleDatetimeToDatetimeUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) {
  1263. auto result = args[0];
  1264. auto& storage = Reference(result);
  1265. if (auto res = Core(storage, *valueBuilder)) {
  1266. storage = res.GetRef();
  1267. return result;
  1268. }
  1269. return TUnboxedValuePod{};
  1270. }
  1271. TMaybe<TTMStorage> StartOfYear(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1272. storage.Month = 1;
  1273. storage.Day = 1;
  1274. storage.Hour = 0;
  1275. storage.Minute = 0;
  1276. storage.Second = 0;
  1277. storage.Microsecond = 0;
  1278. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1279. return {};
  1280. }
  1281. return storage;
  1282. }
  1283. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfYear, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1284. return SimpleDatetimeToDatetimeUdf<StartOfYear>(valueBuilder, args);
  1285. }
  1286. END_SIMPLE_ARROW_UDF(TStartOfYear, TStartOfKernelExec<StartOfYear>::Do);
  1287. void SetEndOfDay(TTMStorage& storage) {
  1288. storage.Hour = 23;
  1289. storage.Minute = 59;
  1290. storage.Second = 59;
  1291. storage.Microsecond = 999999;
  1292. }
  1293. TMaybe<TTMStorage> EndOfYear(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1294. storage.Month = 12;
  1295. storage.Day = 31;
  1296. SetEndOfDay(storage);
  1297. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1298. return {};
  1299. }
  1300. return storage;
  1301. }
  1302. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfYear, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1303. return SimpleDatetimeToDatetimeUdf<EndOfYear>(valueBuilder, args);
  1304. }
  1305. END_SIMPLE_ARROW_UDF(TEndOfYear, TStartOfKernelExec<EndOfYear>::Do);
  1306. TMaybe<TTMStorage> StartOfQuarter(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1307. storage.Month = (storage.Month - 1) / 3 * 3 + 1;
  1308. storage.Day = 1;
  1309. storage.Hour = 0;
  1310. storage.Minute = 0;
  1311. storage.Second = 0;
  1312. storage.Microsecond = 0;
  1313. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1314. return {};
  1315. }
  1316. return storage;
  1317. }
  1318. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfQuarter, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1319. return SimpleDatetimeToDatetimeUdf<StartOfQuarter>(valueBuilder, args);
  1320. }
  1321. END_SIMPLE_ARROW_UDF(TStartOfQuarter, TStartOfKernelExec<StartOfQuarter>::Do);
  1322. TMaybe<TTMStorage> EndOfQuarter(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1323. storage.Month = ((storage.Month - 1) / 3 + 1) * 3;
  1324. storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
  1325. SetEndOfDay(storage);
  1326. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1327. return {};
  1328. }
  1329. return storage;
  1330. }
  1331. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfQuarter, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1332. return SimpleDatetimeToDatetimeUdf<EndOfQuarter>(valueBuilder, args);
  1333. }
  1334. END_SIMPLE_ARROW_UDF(TEndOfQuarter, TStartOfKernelExec<EndOfQuarter>::Do);
  1335. TMaybe<TTMStorage> StartOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1336. storage.Day = 1;
  1337. storage.Hour = 0;
  1338. storage.Minute = 0;
  1339. storage.Second = 0;
  1340. storage.Microsecond = 0;
  1341. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1342. return {};
  1343. }
  1344. return storage;
  1345. }
  1346. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1347. return SimpleDatetimeToDatetimeUdf<StartOfMonth>(valueBuilder, args);
  1348. }
  1349. END_SIMPLE_ARROW_UDF(TStartOfMonth, TStartOfKernelExec<StartOfMonth>::Do);
  1350. TMaybe<TTMStorage> EndOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1351. storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
  1352. SetEndOfDay(storage);
  1353. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1354. return {};
  1355. }
  1356. return storage;
  1357. }
  1358. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1359. return SimpleDatetimeToDatetimeUdf<EndOfMonth>(valueBuilder, args);
  1360. }
  1361. END_SIMPLE_ARROW_UDF(TEndOfMonth, TStartOfKernelExec<EndOfMonth>::Do);
  1362. TMaybe<TTMStorage> StartOfWeek(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1363. const ui32 shift = 86400u * (storage.DayOfWeek - 1u);
  1364. if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) {
  1365. return {};
  1366. }
  1367. storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId);
  1368. storage.Hour = 0;
  1369. storage.Minute = 0;
  1370. storage.Second = 0;
  1371. storage.Microsecond = 0;
  1372. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1373. return {};
  1374. }
  1375. return storage;
  1376. }
  1377. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfWeek, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1378. return SimpleDatetimeToDatetimeUdf<StartOfWeek>(valueBuilder, args);
  1379. }
  1380. END_SIMPLE_ARROW_UDF(TStartOfWeek, TStartOfKernelExec<StartOfWeek>::Do);
  1381. TMaybe<TTMStorage> EndOfWeek(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1382. const ui32 shift = 86400u * (7u - storage.DayOfWeek);
  1383. auto dt = storage.ToDatetime(valueBuilder.GetDateBuilder());
  1384. if (NUdf::MAX_DATETIME - shift <= dt) {
  1385. return {};
  1386. }
  1387. storage.FromDatetime(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId);
  1388. SetEndOfDay(storage);
  1389. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1390. return {};
  1391. }
  1392. return storage;
  1393. }
  1394. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfWeek, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1395. return SimpleDatetimeToDatetimeUdf<EndOfWeek>(valueBuilder, args);
  1396. }
  1397. END_SIMPLE_ARROW_UDF(TEndOfWeek, TStartOfKernelExec<EndOfWeek>::Do);
  1398. TMaybe<TTMStorage> StartOfDay(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1399. storage.Hour = 0;
  1400. storage.Minute = 0;
  1401. storage.Second = 0;
  1402. storage.Microsecond = 0;
  1403. auto& builder = valueBuilder.GetDateBuilder();
  1404. if (!storage.Validate(builder)) {
  1405. return {};
  1406. }
  1407. return storage;
  1408. }
  1409. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfDay, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1410. return SimpleDatetimeToDatetimeUdf<StartOfDay>(valueBuilder, args);
  1411. }
  1412. END_SIMPLE_ARROW_UDF(TStartOfDay, TStartOfKernelExec<StartOfDay>::Do);
  1413. TMaybe<TTMStorage> EndOfDay(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1414. SetEndOfDay(storage);
  1415. auto& builder = valueBuilder.GetDateBuilder();
  1416. if (!storage.Validate(builder)) {
  1417. return {};
  1418. }
  1419. return storage;
  1420. }
  1421. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfDay, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1422. return SimpleDatetimeToDatetimeUdf<EndOfDay>(valueBuilder, args);
  1423. }
  1424. END_SIMPLE_ARROW_UDF(TEndOfDay, TStartOfKernelExec<EndOfDay>::Do);
  1425. TMaybe<TTMStorage> StartOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) {
  1426. if (interval >= 86400000000ull) {
  1427. // treat as StartOfDay
  1428. storage.Hour = 0;
  1429. storage.Minute = 0;
  1430. storage.Second = 0;
  1431. storage.Microsecond = 0;
  1432. } else {
  1433. auto current = storage.ToTimeOfDay();
  1434. auto rounded = current / interval * interval;
  1435. storage.FromTimeOfDay(rounded);
  1436. }
  1437. auto& builder = valueBuilder.GetDateBuilder();
  1438. if (!storage.Validate(builder)) {
  1439. return {};
  1440. }
  1441. return storage;
  1442. }
  1443. TMaybe<TTMStorage> EndOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) {
  1444. if (interval >= 86400000000ull) {
  1445. // treat as EndOfDay
  1446. SetEndOfDay(storage);
  1447. } else {
  1448. auto current = storage.ToTimeOfDay();
  1449. auto rounded = current / interval * (interval + 1) - 1;
  1450. storage.FromTimeOfDay(rounded);
  1451. }
  1452. auto& builder = valueBuilder.GetDateBuilder();
  1453. if (!storage.Validate(builder)) {
  1454. return {};
  1455. }
  1456. return storage;
  1457. }
  1458. template<bool UseEnd>
  1459. struct TStartEndOfBinaryKernelExec : TBinaryKernelExec<TStartEndOfBinaryKernelExec<UseEnd>> {
  1460. template<typename TSink>
  1461. static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
  1462. auto& storage = Reference(arg1);
  1463. ui64 interval = std::abs(arg2.Get<i64>());
  1464. if (interval == 0) {
  1465. sink(arg1);
  1466. return;
  1467. }
  1468. if (auto res = (UseEnd ? EndOf : StartOf)(storage, interval, *valueBuilder)) {
  1469. storage = res.GetRef();
  1470. sink(arg1);
  1471. } else {
  1472. sink(TBlockItem{});
  1473. }
  1474. }
  1475. };
  1476. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) {
  1477. auto result = args[0];
  1478. ui64 interval = std::abs(args[1].Get<i64>());
  1479. if (interval == 0) {
  1480. return result;
  1481. }
  1482. if (auto res = StartOf(Reference(result), interval, *valueBuilder)) {
  1483. Reference(result) = res.GetRef();
  1484. return result;
  1485. }
  1486. return TUnboxedValuePod{};
  1487. }
  1488. END_SIMPLE_ARROW_UDF(TStartOf, TStartEndOfBinaryKernelExec<false>::Do);
  1489. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) {
  1490. auto result = args[0];
  1491. ui64 interval = std::abs(args[1].Get<i64>());
  1492. if (interval == 0) {
  1493. return result;
  1494. }
  1495. if (auto res = EndOf(Reference(result), interval, *valueBuilder)) {
  1496. Reference(result) = res.GetRef();
  1497. return result;
  1498. }
  1499. return TUnboxedValuePod{};
  1500. }
  1501. END_SIMPLE_ARROW_UDF(TEndOf, TStartEndOfBinaryKernelExec<true>::Do);
  1502. struct TTimeOfDayKernelExec : TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> {
  1503. template<typename TSink>
  1504. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1505. Y_UNUSED(valueBuilder);
  1506. auto& storage = Reference(item);
  1507. sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()});
  1508. }
  1509. };
  1510. const auto timeOfDayKernelExecDo = TTimeOfDayKernelExec::Do;
  1511. BEGIN_SIMPLE_STRICT_ARROW_UDF(TTimeOfDay, TInterval(TAutoMap<TResource<TMResourceName>>)) {
  1512. Y_UNUSED(valueBuilder);
  1513. auto& storage = Reference(args[0]);
  1514. return TUnboxedValuePod((i64)storage.ToTimeOfDay());
  1515. }
  1516. END_SIMPLE_ARROW_UDF(TTimeOfDay, timeOfDayKernelExecDo);
  1517. // Add ...
  1518. template<auto Core>
  1519. struct TAddKernelExec : TBinaryKernelExec<TAddKernelExec<Core>> {
  1520. template<typename TSink>
  1521. static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) {
  1522. sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder()));
  1523. }
  1524. };
  1525. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftYears, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1526. return DoAddYears(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1527. }
  1528. END_SIMPLE_ARROW_UDF(TShiftYears, TAddKernelExec<DoAddYears<TBlockItem>>::Do);
  1529. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftQuarters, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1530. return DoAddQuarters(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1531. }
  1532. END_SIMPLE_ARROW_UDF(TShiftQuarters, TAddKernelExec<DoAddQuarters<TBlockItem>>::Do);
  1533. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftMonths, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1534. return DoAddMonths(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1535. }
  1536. END_SIMPLE_ARROW_UDF(TShiftMonths, TAddKernelExec<DoAddMonths<TBlockItem>>::Do);
  1537. template<size_t Digits, bool Exacly = true>
  1538. struct PrintNDigits;
  1539. template<bool Exacly>
  1540. struct PrintNDigits<0U, Exacly> {
  1541. static constexpr ui32 Miltiplier = 1U;
  1542. template <typename T>
  1543. static constexpr size_t Do(T, char*) { return 0U; }
  1544. };
  1545. template<size_t Digits, bool Exacly>
  1546. struct PrintNDigits {
  1547. using TNextPrint = PrintNDigits<Digits - 1U, Exacly>;
  1548. static constexpr ui32 Miltiplier = TNextPrint::Miltiplier * 10U;
  1549. template <typename T>
  1550. static constexpr size_t Do(T in, char* out) {
  1551. in %= Miltiplier;
  1552. if (Exacly || in) {
  1553. *out = "0123456789"[in / TNextPrint::Miltiplier];
  1554. return 1U + TNextPrint::Do(in, ++out);
  1555. }
  1556. return 0U;
  1557. }
  1558. };
  1559. // Format
  1560. class TFormat : public TBoxedValue {
  1561. public:
  1562. explicit TFormat(TSourcePosition pos)
  1563. : Pos_(pos)
  1564. {}
  1565. static const TStringRef& Name() {
  1566. static auto name = TStringRef::Of("Format");
  1567. return name;
  1568. }
  1569. static bool DeclareSignature(
  1570. const TStringRef& name,
  1571. TType*,
  1572. IFunctionTypeInfoBuilder& builder,
  1573. bool typesOnly)
  1574. {
  1575. if (Name() != name) {
  1576. return false;
  1577. }
  1578. auto resourceType = builder.Resource(TMResourceName);
  1579. auto stringType = builder.SimpleType<char*>();
  1580. auto boolType = builder.SimpleType<bool>();
  1581. auto optionalBoolType = builder.Optional()->Item(boolType).Build();
  1582. auto args = builder.Args();
  1583. args->Add(stringType);
  1584. args->Add(optionalBoolType).Name("AlwaysWriteFractionalSeconds");
  1585. args->Done();
  1586. builder.OptionalArgs(1);
  1587. builder.Returns(
  1588. builder.Callable(1)
  1589. ->Returns(stringType)
  1590. .Arg(resourceType)
  1591. .Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1592. .Build()
  1593. );
  1594. if (!typesOnly) {
  1595. builder.Implementation(new TFormat(builder.GetSourcePosition()));
  1596. }
  1597. return true;
  1598. }
  1599. private:
  1600. using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>;
  1601. struct TDataPrinter {
  1602. const std::string_view Data;
  1603. size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const {
  1604. std::memcpy(out, Data.data(), Data.size());
  1605. return Data.size();
  1606. }
  1607. };
  1608. TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
  1609. bool alwaysWriteFractionalSeconds = false;
  1610. if (auto val = args[1]) {
  1611. alwaysWriteFractionalSeconds = val.Get<bool>();
  1612. }
  1613. return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds));
  1614. } catch (const std::exception& e) {
  1615. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1616. }
  1617. class TImpl : public TBoxedValue {
  1618. public:
  1619. TUnboxedValue Run(
  1620. const IValueBuilder* valueBuilder,
  1621. const TUnboxedValuePod* args) const override
  1622. {
  1623. try {
  1624. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1625. const auto value = args[0];
  1626. auto& builder = valueBuilder->GetDateBuilder();
  1627. auto result = valueBuilder->NewStringNotFilled(ReservedSize_);
  1628. auto pos = result.AsStringRef().Data();
  1629. ui32 size = 0U;
  1630. for (const auto& printer : Printers_) {
  1631. if (const auto plus = printer(pos, value, builder)) {
  1632. size += plus;
  1633. pos += plus;
  1634. }
  1635. }
  1636. if (size < ReservedSize_) {
  1637. result = valueBuilder->SubString(result.Release(), 0U, size);
  1638. }
  1639. return result;
  1640. } catch (const std::exception& e) {
  1641. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1642. }
  1643. }
  1644. TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds)
  1645. : Pos_(pos)
  1646. , Format_(format)
  1647. {
  1648. const std::string_view formatView(Format_.AsStringRef());
  1649. auto dataStart = formatView.begin();
  1650. size_t dataSize = 0U;
  1651. for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
  1652. if (*ptr != '%') {
  1653. ++dataSize;
  1654. continue;
  1655. }
  1656. if (dataSize) {
  1657. Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)});
  1658. ReservedSize_ += dataSize;
  1659. dataSize = 0U;
  1660. }
  1661. if (formatView.end() == ++ptr) {
  1662. ythrow yexception() << "format string ends with single %%";
  1663. }
  1664. switch (*ptr) {
  1665. case '%': {
  1666. static constexpr size_t size = 1;
  1667. Printers_.emplace_back([](char* out, const TUnboxedValuePod&, const IDateBuilder&) {
  1668. *out = '%';
  1669. return size;
  1670. });
  1671. ReservedSize_ += size;
  1672. break;
  1673. }
  1674. case 'Y': {
  1675. static constexpr size_t size = 4;
  1676. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1677. return PrintNDigits<size>::Do(GetYear(value), out);
  1678. });
  1679. ReservedSize_ += size;
  1680. break;
  1681. }
  1682. case 'm': {
  1683. static constexpr size_t size = 2;
  1684. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1685. return PrintNDigits<size>::Do(GetMonth(value), out);
  1686. });
  1687. ReservedSize_ += size;
  1688. break;
  1689. }
  1690. case 'd': {
  1691. static constexpr size_t size = 2;
  1692. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1693. return PrintNDigits<size>::Do(GetDay(value), out);
  1694. });
  1695. ReservedSize_ += size;
  1696. break;
  1697. }
  1698. case 'H': {
  1699. static constexpr size_t size = 2;
  1700. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1701. return PrintNDigits<size>::Do(GetHour(value), out);
  1702. });
  1703. ReservedSize_ += size;
  1704. break;
  1705. }
  1706. case 'M': {
  1707. static constexpr size_t size = 2;
  1708. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1709. return PrintNDigits<size>::Do(GetMinute(value), out);
  1710. });
  1711. ReservedSize_ += size;
  1712. break;
  1713. }
  1714. case 'S':
  1715. Printers_.emplace_back([alwaysWriteFractionalSeconds](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1716. constexpr size_t size = 2;
  1717. if (const auto microsecond = GetMicrosecond(value); microsecond || alwaysWriteFractionalSeconds) {
  1718. out += PrintNDigits<size>::Do(GetSecond(value), out);
  1719. *out++ = '.';
  1720. constexpr size_t msize = 6;
  1721. auto addSz = alwaysWriteFractionalSeconds ?
  1722. PrintNDigits<msize, true>::Do(microsecond, out) :
  1723. PrintNDigits<msize, false>::Do(microsecond, out);
  1724. return size + 1U + addSz;
  1725. }
  1726. return PrintNDigits<size>::Do(GetSecond(value), out);
  1727. });
  1728. ReservedSize_ += 9;
  1729. break;
  1730. case 'z': {
  1731. static constexpr size_t size = 5;
  1732. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder& builder) {
  1733. auto timezoneId = GetTimezoneId(value);
  1734. if (TTMStorage::IsUniversal(timezoneId)) {
  1735. std::memcpy(out, "+0000", size);
  1736. return size;
  1737. }
  1738. i32 shift;
  1739. if (!builder.GetTimezoneShift(GetYear(value), GetMonth(value), GetDay(value),
  1740. GetHour(value), GetMinute(value), GetSecond(value), timezoneId, shift))
  1741. {
  1742. std::memcpy(out, "+0000", size);
  1743. return size;
  1744. }
  1745. *out++ = shift > 0 ? '+' : '-';
  1746. shift = std::abs(shift);
  1747. out += PrintNDigits<2U>::Do(shift / 60U, out);
  1748. out += PrintNDigits<2U>::Do(shift % 60U, out);
  1749. return size;
  1750. });
  1751. ReservedSize_ += size;
  1752. break;
  1753. }
  1754. case 'Z':
  1755. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1756. const auto timezoneId = GetTimezoneId(value);
  1757. const auto tzName = NUdf::GetTimezones()[timezoneId];
  1758. std::memcpy(out, tzName.data(), std::min(tzName.size(), MAX_TIMEZONE_NAME_LEN));
  1759. return tzName.size();
  1760. });
  1761. ReservedSize_ += MAX_TIMEZONE_NAME_LEN;
  1762. break;
  1763. case 'b': {
  1764. static constexpr size_t size = 3;
  1765. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1766. static constexpr std::string_view mp[] {
  1767. "Jan",
  1768. "Feb",
  1769. "Mar",
  1770. "Apr",
  1771. "May",
  1772. "Jun",
  1773. "Jul",
  1774. "Aug",
  1775. "Sep",
  1776. "Oct",
  1777. "Nov",
  1778. "Dec"
  1779. };
  1780. auto month = GetMonth(value);
  1781. Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
  1782. std::memcpy(out, mp[month - 1].data(), size);
  1783. return size;
  1784. });
  1785. ReservedSize_ += size;
  1786. break;
  1787. }
  1788. case 'B': {
  1789. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1790. static constexpr std::string_view mp[] {
  1791. "January",
  1792. "February",
  1793. "March",
  1794. "April",
  1795. "May",
  1796. "June",
  1797. "July",
  1798. "August",
  1799. "September",
  1800. "October",
  1801. "November",
  1802. "December"
  1803. };
  1804. auto month = GetMonth(value);
  1805. Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
  1806. const std::string_view monthFullName = mp[month - 1];
  1807. std::memcpy(out, monthFullName.data(), monthFullName.size());
  1808. return monthFullName.size();
  1809. });
  1810. ReservedSize_ += 9U; // MAX_MONTH_FULL_NAME_LEN
  1811. break;
  1812. }
  1813. default:
  1814. ythrow yexception() << "invalid format character: " << *ptr;
  1815. }
  1816. dataStart = ptr + 1U;
  1817. }
  1818. if (dataSize) {
  1819. Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)});
  1820. ReservedSize_ += dataSize;
  1821. }
  1822. }
  1823. private:
  1824. const TSourcePosition Pos_;
  1825. TUnboxedValue Format_;
  1826. TPrintersList Printers_{};
  1827. size_t ReservedSize_ = 0;
  1828. };
  1829. const TSourcePosition Pos_;
  1830. };
  1831. template<size_t Digits>
  1832. struct ParseExaclyNDigits;
  1833. template<>
  1834. struct ParseExaclyNDigits<0U> {
  1835. template <typename T>
  1836. static constexpr bool Do(std::string_view::const_iterator&, T&) {
  1837. return true;
  1838. }
  1839. };
  1840. template<size_t Digits>
  1841. struct ParseExaclyNDigits {
  1842. template <typename T>
  1843. static constexpr bool Do(std::string_view::const_iterator& it, T& out) {
  1844. const auto d = *it;
  1845. if (!std::isdigit(d)) {
  1846. return false;
  1847. }
  1848. out *= 10U;
  1849. out += d - '0';
  1850. return ParseExaclyNDigits<Digits - 1U>::Do(++it, out);
  1851. }
  1852. };
  1853. // Parse
  1854. class TParse : public TBoxedValue {
  1855. public:
  1856. class TFactory : public TBoxedValue {
  1857. public:
  1858. explicit TFactory(TSourcePosition pos)
  1859. : Pos_(pos)
  1860. {}
  1861. private:
  1862. TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
  1863. return TUnboxedValuePod(new TParse(args[0], Pos_));
  1864. } catch (const std::exception& e) {
  1865. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1866. }
  1867. const TSourcePosition Pos_;
  1868. };
  1869. static const TStringRef& Name() {
  1870. static auto name = TStringRef::Of("Parse");
  1871. return name;
  1872. }
  1873. static bool DeclareSignature(
  1874. const TStringRef& name,
  1875. TType*,
  1876. IFunctionTypeInfoBuilder& builder,
  1877. bool typesOnly)
  1878. {
  1879. if (Name() != name) {
  1880. return false;
  1881. }
  1882. auto resourceType = builder.Resource(TMResourceName);
  1883. auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
  1884. builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1885. .Add(builder.Optional()->Item<ui16>())
  1886. .Done()
  1887. .OptionalArgs(1);
  1888. builder.RunConfig<char*>().Returns(optionalResourceType);
  1889. if (!typesOnly) {
  1890. builder.Implementation(new TParse::TFactory(builder.GetSourcePosition()));
  1891. }
  1892. return true;
  1893. }
  1894. private:
  1895. const TSourcePosition Pos_;
  1896. const TUnboxedValue Format_;
  1897. std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_;
  1898. struct TDataScanner {
  1899. const std::string_view Data_;
  1900. bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const {
  1901. if (limit < Data_.size() || !std::equal(Data_.begin(), Data_.end(), it)) {
  1902. return false;
  1903. }
  1904. std::advance(it, Data_.size());
  1905. return true;
  1906. }
  1907. };
  1908. TUnboxedValue Run(
  1909. const IValueBuilder* valueBuilder,
  1910. const TUnboxedValuePod* args) const override
  1911. {
  1912. try {
  1913. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1914. const std::string_view buffer = args[0].AsStringRef();
  1915. TUnboxedValuePod result(0);
  1916. auto& storage = Reference(result);
  1917. storage.MakeDefault();
  1918. auto& builder = valueBuilder->GetDateBuilder();
  1919. auto it = buffer.begin();
  1920. for (const auto& scanner : Scanners_) {
  1921. if (!scanner(it, std::distance(it, buffer.end()), result, builder)) {
  1922. return TUnboxedValuePod();
  1923. }
  1924. }
  1925. if (buffer.end() != it || !storage.Validate(builder)) {
  1926. return TUnboxedValuePod();
  1927. }
  1928. return result;
  1929. } catch (const std::exception& e) {
  1930. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1931. }
  1932. }
  1933. TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos)
  1934. : Pos_(pos)
  1935. , Format_(runConfig)
  1936. {
  1937. const std::string_view formatView(Format_.AsStringRef());
  1938. auto dataStart = formatView.begin();
  1939. size_t dataSize = 0U;
  1940. for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
  1941. if (*ptr != '%') {
  1942. ++dataSize;
  1943. continue;
  1944. }
  1945. if (dataSize) {
  1946. Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
  1947. dataSize = 0;
  1948. }
  1949. if (++ptr == formatView.end()) {
  1950. ythrow yexception() << "format string ends with single %%";
  1951. }
  1952. switch (*ptr) {
  1953. case '%':
  1954. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) {
  1955. return limit > 0U && *it++ == '%';
  1956. });
  1957. break;
  1958. case 'Y': {
  1959. static constexpr size_t size = 4;
  1960. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1961. ui32 year = 0U;
  1962. if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) {
  1963. return false;
  1964. }
  1965. SetYear(result, year);
  1966. return true;
  1967. });
  1968. break;
  1969. }
  1970. case 'm': {
  1971. static constexpr size_t size = 2;
  1972. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1973. ui32 month = 0U;
  1974. if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
  1975. return false;
  1976. }
  1977. SetMonth(result, month);
  1978. return true;
  1979. });
  1980. break;
  1981. }
  1982. case 'd': {
  1983. static constexpr size_t size = 2;
  1984. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1985. ui32 day = 0U;
  1986. if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) {
  1987. return false;
  1988. }
  1989. SetDay(result, day);
  1990. return true;
  1991. });
  1992. break;
  1993. }
  1994. case 'H': {
  1995. static constexpr size_t size = 2;
  1996. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1997. ui32 hour = 0U;
  1998. if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
  1999. return false;
  2000. }
  2001. SetHour(result, hour);
  2002. return true;
  2003. });
  2004. break;
  2005. }
  2006. case 'M': {
  2007. static constexpr size_t size = 2;
  2008. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2009. ui32 minute = 0U;
  2010. if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
  2011. return false;
  2012. }
  2013. SetMinute(result, minute);
  2014. return true;
  2015. });
  2016. break;
  2017. }
  2018. case 'S': {
  2019. static constexpr size_t size = 2;
  2020. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2021. ui32 second = 0U;
  2022. if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
  2023. return false;
  2024. }
  2025. SetSecond(result, second);
  2026. limit -= size;
  2027. if (!limit || *it != '.') {
  2028. return true;
  2029. }
  2030. ++it;
  2031. --limit;
  2032. ui32 usec = 0U;
  2033. size_t digits = 6U;
  2034. for (; limit; --limit) {
  2035. const auto c = *it;
  2036. if (!digits || !std::isdigit(c)) {
  2037. break;
  2038. }
  2039. usec *= 10U;
  2040. usec += c - '0';
  2041. ++it;
  2042. --digits;
  2043. }
  2044. for (; !digits && limit && std::isdigit(*it); --limit, ++it);
  2045. while (digits--) {
  2046. usec *= 10U;
  2047. }
  2048. SetMicrosecond(result, usec);
  2049. return true;
  2050. });
  2051. break;
  2052. }
  2053. case 'Z':
  2054. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder& builder) {
  2055. const auto start = it;
  2056. while (limit > 0 && (std::isalnum(*it) || *it == '/' || *it == '_' || *it == '-' || *it == '+')) {
  2057. ++it;
  2058. --limit;
  2059. }
  2060. const auto size = std::distance(start, it);
  2061. ui32 timezoneId;
  2062. if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) {
  2063. return false;
  2064. }
  2065. SetTimezoneId(result, timezoneId);
  2066. return true;
  2067. });
  2068. break;
  2069. case 'b': {
  2070. static constexpr size_t size = 3;
  2071. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2072. const auto start = it;
  2073. size_t cnt = 0U;
  2074. while (limit > 0 && cnt < size && std::isalpha(*it)) {
  2075. ++it;
  2076. ++cnt;
  2077. --limit;
  2078. }
  2079. const std::string_view monthName{start, cnt};
  2080. ui8 month = 0U;
  2081. if (cnt < size || !ValidateMonthShortName(monthName, month)) {
  2082. return false;
  2083. }
  2084. SetMonth(result, month);
  2085. return true;
  2086. });
  2087. break;
  2088. }
  2089. case 'B': {
  2090. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2091. const auto start = it;
  2092. size_t cnt = 0U;
  2093. while (limit > 0 && std::isalpha(*it)) {
  2094. ++it;
  2095. ++cnt;
  2096. --limit;
  2097. }
  2098. const std::string_view monthName{start, cnt};
  2099. ui8 month = 0U;
  2100. if (!ValidateMonthFullName(monthName, month)) {
  2101. return false;
  2102. }
  2103. SetMonth(result, month);
  2104. return true;
  2105. });
  2106. break;
  2107. }
  2108. default:
  2109. ythrow yexception() << "invalid format character: " << *ptr;
  2110. }
  2111. dataStart = ptr + 1U;
  2112. }
  2113. if (dataSize) {
  2114. Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
  2115. }
  2116. }
  2117. };
  2118. #define PARSE_SPECIFIC_FORMAT(format) \
  2119. SIMPLE_STRICT_UDF(TParse##format, TOptional<TResource<TMResourceName>>(TAutoMap<char*>)) { \
  2120. auto str = args[0].AsStringRef(); \
  2121. TInstant instant; \
  2122. if (!TInstant::TryParse##format(TStringBuf(str.Data(), str.Size()), instant) || instant.Seconds() >= NUdf::MAX_DATETIME) { \
  2123. return TUnboxedValuePod(); \
  2124. } \
  2125. auto& builder = valueBuilder->GetDateBuilder(); \
  2126. TUnboxedValuePod result(0); \
  2127. auto& storage = Reference(result); \
  2128. storage.FromTimestamp(builder, instant.MicroSeconds()); \
  2129. return result; \
  2130. }
  2131. PARSE_SPECIFIC_FORMAT(Rfc822);
  2132. PARSE_SPECIFIC_FORMAT(Iso8601);
  2133. PARSE_SPECIFIC_FORMAT(Http);
  2134. PARSE_SPECIFIC_FORMAT(X509);
  2135. SIMPLE_MODULE(TDateTime2Module,
  2136. TUserDataTypeFuncFactory<true, true, SplitName, TSplit,
  2137. TDate,
  2138. TDatetime,
  2139. TTimestamp,
  2140. TTzDate,
  2141. TTzDatetime,
  2142. TTzTimestamp,
  2143. TDate32,
  2144. TDatetime64,
  2145. TTimestamp64>,
  2146. TMakeDate,
  2147. TMakeDatetime,
  2148. TMakeTimestamp,
  2149. TMakeTzDate,
  2150. TMakeTzDatetime,
  2151. TMakeTzTimestamp,
  2152. TConvert,
  2153. TMakeDate32,
  2154. TMakeDatetime64,
  2155. TMakeTimestamp64,
  2156. TGetDateComponent<GetYearName, ui16, GetYear, i32, GetWYear>,
  2157. TGetDateComponent<GetDayOfYearName, ui16, GetDayOfYear, ui16, GetWDayOfYear>,
  2158. TGetDateComponent<GetMonthName, ui8, GetMonth, ui8, GetWMonth>,
  2159. TGetMonthName,
  2160. TGetDateComponent<GetWeekOfYearName, ui8, GetWeekOfYear, ui8, GetWWeekOfYear>,
  2161. TGetDateComponent<GetWeekOfYearIso8601Name, ui8, GetWeekOfYearIso8601, ui8, GetWWeekOfYearIso8601>,
  2162. TGetDateComponent<GetDayOfMonthName, ui8, GetDay, ui8, GetWDay>,
  2163. TGetDateComponent<GetDayOfWeekName, ui8, GetDayOfWeek, ui8, GetWDayOfWeek>,
  2164. TGetDayOfWeekName,
  2165. TGetTimeComponent<GetHourName, ui8, GetHour, 1u, 3600u, 24u, false>,
  2166. TGetTimeComponent<GetMinuteName, ui8, GetMinute, 1u, 60u, 60u, false>,
  2167. TGetTimeComponent<GetSecondName, ui8, GetSecond, 1u, 1u, 60u, false>,
  2168. TGetTimeComponent<GetMillisecondOfSecondName, ui32, GetMicrosecond, 1000u, 1000u, 1000u, true>,
  2169. TGetTimeComponent<GetMicrosecondOfSecondName, ui32, GetMicrosecond, 1u, 1u, 1000000u, true>,
  2170. TGetDateComponent<GetTimezoneIdName, ui16, GetTimezoneId, ui16, GetWTimezoneId>,
  2171. TGetTimezoneName,
  2172. TUpdate,
  2173. TFromSeconds,
  2174. TFromMilliseconds,
  2175. TFromMicroseconds,
  2176. TFromSeconds64,
  2177. TFromMilliseconds64,
  2178. TFromMicroseconds64,
  2179. TIntervalFromDays,
  2180. TIntervalFromHours,
  2181. TIntervalFromMinutes,
  2182. TIntervalFromSeconds,
  2183. TIntervalFromMilliseconds,
  2184. TIntervalFromMicroseconds,
  2185. TInterval64FromDays,
  2186. TInterval64FromHours,
  2187. TInterval64FromMinutes,
  2188. TInterval64FromSeconds,
  2189. TInterval64FromMilliseconds,
  2190. TInterval64FromMicroseconds,
  2191. TToDays,
  2192. TToHours,
  2193. TToMinutes,
  2194. TStartOfYear,
  2195. TStartOfQuarter,
  2196. TStartOfMonth,
  2197. TStartOfWeek,
  2198. TStartOfDay,
  2199. TStartOf,
  2200. TTimeOfDay,
  2201. TShiftYears,
  2202. TShiftQuarters,
  2203. TShiftMonths,
  2204. TEndOfYear,
  2205. TEndOfQuarter,
  2206. TEndOfMonth,
  2207. TEndOfWeek,
  2208. TEndOfDay,
  2209. TToUnits<ToSecondsName, ui32, 1>,
  2210. TToUnits<ToMillisecondsName, ui64, 1000>,
  2211. TToUnits<ToMicrosecondsName, ui64, 1000000>,
  2212. TFormat,
  2213. TParse,
  2214. TParseRfc822,
  2215. TParseIso8601,
  2216. TParseHttp,
  2217. TParseX509
  2218. )
  2219. }
  2220. REGISTER_MODULES(TDateTime2Module)