datetime_udf.cpp 119 KB


  1. #include <yql/essentials/minikql/mkql_type_ops.h>
  2. #include <yql/essentials/public/udf/tz/udf_tz.h>
  3. #include <yql/essentials/public/udf/udf_helpers.h>
  4. #include <yql/essentials/minikql/datetime/datetime.h>
  5. #include <yql/essentials/minikql/datetime/datetime64.h>
  6. #include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
  7. #include <util/datetime/base.h>
  8. using namespace NKikimr;
  9. using namespace NUdf;
  10. using namespace NYql::DateTime;
  11. extern const char SplitUDF[] = "Split";
  12. extern const char ToSecondsUDF[] = "ToSeconds";
  13. extern const char ToMillisecondsUDF[] = "ToMilliseconds";
  14. extern const char ToMicrosecondsUDF[] = "ToMicroseconds";
  15. extern const char GetYearUDF[] = "GetYear";
  16. extern const char GetDayOfYearUDF[] = "GetDayOfYear";
  17. extern const char GetMonthUDF[] = "GetMonth";
  18. extern const char GetMonthNameUDF[] = "GetMonthName";
  19. extern const char GetWeekOfYearUDF[] = "GetWeekOfYear";
  20. extern const char GetWeekOfYearIso8601UDF[] = "GetWeekOfYearIso8601";
  21. extern const char GetDayOfMonthUDF[] = "GetDayOfMonth";
  22. extern const char GetDayOfWeekUDF[] = "GetDayOfWeek";
  23. extern const char GetDayOfWeekNameUDF[] = "GetDayOfWeekName";
  24. extern const char GetTimezoneIdUDF[] = "GetTimezoneId";
  25. extern const char GetTimezoneNameUDF[] = "GetTimezoneName";
  26. extern const char GetHourUDF[] = "GetHour";
  27. extern const char GetMinuteUDF[] = "GetMinute";
  28. extern const char GetSecondUDF[] = "GetSecond";
  29. extern const char GetMillisecondOfSecondUDF[] = "GetMillisecondOfSecond";
  30. extern const char GetMicrosecondOfSecondUDF[] = "GetMicrosecondOfSecond";
  31. extern const char StartOfYearUDF[] = "StartOfYear";
  32. extern const char StartOfQuarterUDF[] = "StartOfQuarter";
  33. extern const char StartOfMonthUDF[] = "StartOfMonth";
  34. extern const char StartOfWeekUDF[] = "StartOfWeek";
  35. extern const char StartOfDayUDF[] = "StartOfDay";
  36. extern const char EndOfYearUDF[] = "EndOfYear";
  37. extern const char EndOfQuarterUDF[] = "EndOfQuarter";
  38. extern const char EndOfMonthUDF[] = "EndOfMonth";
  39. extern const char EndOfWeekUDF[] = "EndOfWeek";
  40. extern const char EndOfDayUDF[] = "EndOfDay";
  41. extern const char ShiftYearsUDF[] = "ShiftYears";
  42. extern const char ShiftQuartersUDF[] = "ShiftQuarters";
  43. extern const char ShiftMonthsUDF[] = "ShiftMonths";
  44. extern const char TMResourceName[] = "DateTime2.TM";
  45. extern const char TM64ResourceName[] = "DateTime2.TM64";
  46. const auto UsecondsInDay = 86400000000ll;
  47. const auto UsecondsInHour = 3600000000ll;
  48. const auto UsecondsInMinute = 60000000ll;
  49. const auto UsecondsInSecond = 1000000ll;
  50. const auto UsecondsInMilliseconds = 1000ll;
  51. template <const char* TFuncName, typename TResult, ui32 ScaleAfterSeconds>
  52. class TToUnits {
  53. public:
  54. typedef bool TTypeAwareMarker;
  55. using TSignedResult = typename std::make_signed<TResult>::type;
  56. static TResult DateCore(ui16 value) {
  57. return value * ui32(86400) * TResult(ScaleAfterSeconds);
  58. }
  59. template<typename TTzDate>
  60. static TResult TzBlockCore(TBlockItem tzDate);
  61. template<>
  62. static TResult TzBlockCore<TTzDate>(TBlockItem tzDate) {
  63. return DateCore(tzDate.Get<ui16>());
  64. }
  65. template<>
  66. static TResult TzBlockCore<TTzDatetime>(TBlockItem tzDate) {
  67. return DatetimeCore(tzDate.Get<ui32>());
  68. }
  69. template<>
  70. static TResult TzBlockCore<TTzTimestamp>(TBlockItem tzDate) {
  71. return TimestampCore(tzDate.Get<ui64>());
  72. }
  73. static TResult DatetimeCore(ui32 value) {
  74. return value * TResult(ScaleAfterSeconds);
  75. }
  76. static TResult TimestampCore(ui64 value) {
  77. return TResult(value / (1000000u / ScaleAfterSeconds));
  78. }
  79. static TSignedResult IntervalCore(i64 value) {
  80. return TSignedResult(value / (1000000u / ScaleAfterSeconds));
  81. }
  82. static const TStringRef& Name() {
  83. static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
  84. return name;
  85. }
  86. template<typename TTzDate, typename TOutput>
  87. static auto MakeTzBlockExec() {
  88. using TReader = TTzDateBlockReader<TTzDate, /*Nullable*/ false>;
  89. return UnaryPreallocatedReaderExecImpl<TReader, TOutput, TzBlockCore<TTzDate>>;
  90. }
  91. static bool DeclareSignature(
  92. const TStringRef& name,
  93. TType* userType,
  94. IFunctionTypeInfoBuilder& builder,
  95. bool typesOnly)
  96. {
  97. if (Name() != name) {
  98. return false;
  99. }
  100. try {
  101. auto typeInfoHelper = builder.TypeInfoHelper();
  102. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  103. Y_ENSURE(tuple);
  104. Y_ENSURE(tuple.GetElementsCount() > 0);
  105. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  106. Y_ENSURE(argsTuple);
  107. if (argsTuple.GetElementsCount() != 1) {
  108. builder.SetError("Expected one argument");
  109. return true;
  110. }
  111. auto argType = argsTuple.GetElementType(0);
  112. TVector<const TType*> argBlockTypes;
  113. argBlockTypes.push_back(argType);
  114. TBlockTypeInspector block(*typeInfoHelper, argType);
  115. if (block) {
  116. Y_ENSURE(!block.IsScalar());
  117. argType = block.GetItemType();
  118. }
  119. bool isOptional = false;
  120. if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  121. argType = opt.GetItemType();
  122. isOptional = true;
  123. }
  124. TDataTypeInspector data(*typeInfoHelper, argType);
  125. if (!data) {
  126. builder.SetError("Expected data type");
  127. return true;
  128. }
  129. auto typeId = data.GetTypeId();
  130. if (!(typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id ||
  131. typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id ||
  132. typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id ||
  133. typeId == TDataType<TInterval>::Id)) {
  134. builder.SetError(TStringBuilder() << "Type " << GetDataTypeInfo(GetDataSlot(typeId)).Name << " is not supported");
  135. }
  136. builder.Args()->Add(argsTuple.GetElementType(0)).Done();
  137. const TType* retType;
  138. if (typeId != TDataType<TInterval>::Id) {
  139. retType = builder.SimpleType<TResult>();
  140. } else {
  141. retType = builder.SimpleType<TSignedResult>();
  142. }
  143. if (isOptional) {
  144. retType = builder.Optional()->Item(retType).Build();
  145. }
  146. auto outputType = retType;
  147. if (block) {
  148. retType = builder.Block(block.IsScalar())->Item(retType).Build();
  149. }
  150. builder.Returns(retType);
  151. builder.SupportsBlocks();
  152. builder.IsStrict();
  153. builder.UserType(userType);
  154. if (!typesOnly) {
  155. if (typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id) {
  156. if (block) {
  157. const auto exec = (typeId == TDataType<TTzDate>::Id)
  158. ? MakeTzBlockExec<TTzDate, TResult>()
  159. : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>;
  160. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  161. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  162. } else {
  163. builder.Implementation(new TUnaryOverOptionalImpl<ui16, TResult, DateCore>());
  164. }
  165. }
  166. if (typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id) {
  167. if (block) {
  168. const auto exec = (typeId == TDataType<TTzDatetime>::Id)
  169. ? MakeTzBlockExec<TTzDatetime, TResult>()
  170. : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>;
  171. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  172. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  173. } else {
  174. builder.Implementation(new TUnaryOverOptionalImpl<ui32, TResult, DatetimeCore>());
  175. }
  176. }
  177. if (typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id) {
  178. if (block) {
  179. const auto exec = (typeId == TDataType<TTzTimestamp>::Id)
  180. ? MakeTzBlockExec<TTzTimestamp, TResult>()
  181. : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>;
  182. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  183. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  184. } else {
  185. builder.Implementation(new TUnaryOverOptionalImpl<ui64, TResult, TimestampCore>());
  186. }
  187. }
  188. if (typeId == TDataType<TInterval>::Id) {
  189. if (block) {
  190. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  191. UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  192. } else {
  193. builder.Implementation(new TUnaryOverOptionalImpl<i64, TSignedResult, IntervalCore>());
  194. }
  195. }
  196. }
  197. } catch (const std::exception& e) {
  198. builder.SetError(TStringBuf(e.what()));
  199. }
  200. return true;
  201. }
  202. };
  203. template <const char* TFuncName, typename TFieldStorage,
  204. TFieldStorage (*Accessor)(const TUnboxedValuePod&),
  205. TFieldStorage (*WAccessor)(const TUnboxedValuePod&),
  206. ui32 Divisor, ui32 Scale, ui32 Limit, bool Fractional>
  207. struct TGetTimeComponent {
  208. typedef bool TTypeAwareMarker;
  209. static const TStringRef& Name() {
  210. static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
  211. return name;
  212. }
  213. static bool DeclareSignature(
  214. const TStringRef& name,
  215. TType* userType,
  216. IFunctionTypeInfoBuilder& builder,
  217. bool typesOnly)
  218. {
  219. if (Name() != name) {
  220. return false;
  221. }
  222. if (!userType) {
  223. builder.SetError("User type is missing");
  224. return true;
  225. }
  226. builder.UserType(userType);
  227. const auto typeInfoHelper = builder.TypeInfoHelper();
  228. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  229. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  230. Y_ENSURE(tuple.GetElementsCount() > 0,
  231. "Tuple has to contain positional arguments");
  232. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  233. Y_ENSURE(argsTuple, "Tuple with args expected");
  234. if (argsTuple.GetElementsCount() != 1) {
  235. builder.SetError("Single argument expected");
  236. return true;
  237. }
  238. auto argType = argsTuple.GetElementType(0);
  239. TVector<const TType*> argBlockTypes;
  240. argBlockTypes.push_back(argType);
  241. TBlockTypeInspector block(*typeInfoHelper, argType);
  242. if (block) {
  243. Y_ENSURE(!block.IsScalar());
  244. argType = block.GetItemType();
  245. }
  246. bool isOptional = false;
  247. if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  248. argType = opt.GetItemType();
  249. isOptional = true;
  250. }
  251. TResourceTypeInspector resource(*typeInfoHelper, argType);
  252. if (!resource) {
  253. TDataTypeInspector data(*typeInfoHelper, argType);
  254. if (!data) {
  255. builder.SetError("Data type expected");
  256. return true;
  257. }
  258. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  259. if (features & NUdf::BigDateType) {
  260. BuildSignature<TFieldStorage, TM64ResourceName, WAccessor>(builder, typesOnly);
  261. return true;
  262. }
  263. if (features & NUdf::TzDateType) {
  264. BuildSignature<TFieldStorage, TMResourceName, Accessor>(builder, typesOnly);
  265. return true;
  266. }
  267. if (features & NUdf::DateType) {
  268. builder.Args()->Add(argsTuple.GetElementType(0)).Done();
  269. const TType* retType = builder.SimpleType<TFieldStorage>();
  270. if (isOptional) {
  271. retType = builder.Optional()->Item(retType).Build();
  272. }
  273. auto outputType = retType;
  274. if (block) {
  275. retType = builder.Block(block.IsScalar())->Item(retType).Build();
  276. }
  277. builder.Returns(retType);
  278. builder.SupportsBlocks();
  279. builder.IsStrict();
  280. if (!typesOnly) {
  281. const auto typeId = data.GetTypeId();
  282. if (typeId == TDataType<TDate>::Id) {
  283. if (block) {
  284. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  285. UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  286. } else {
  287. builder.Implementation(new TUnaryOverOptionalImpl<ui16, TFieldStorage, Core<ui16, true, false>>());
  288. }
  289. }
  290. if (typeId == TDataType<TDatetime>::Id) {
  291. if (block) {
  292. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  293. UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  294. } else {
  295. builder.Implementation(new TUnaryOverOptionalImpl<ui32, TFieldStorage, Core<ui32, false, false>>());
  296. }
  297. }
  298. if (typeId == TDataType<TTimestamp>::Id) {
  299. if (block) {
  300. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  301. UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  302. } else {
  303. builder.Implementation(new TUnaryOverOptionalImpl<ui64, TFieldStorage, Core<ui64, false, true>>());
  304. }
  305. }
  306. }
  307. return true;
  308. }
  309. ::TStringBuilder sb;
  310. sb << "Invalid argument type: got ";
  311. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  312. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  313. << TM64ResourceName << "> expected";
  314. builder.SetError(sb);
  315. return true;
  316. }
  317. Y_ENSURE(!block);
  318. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  319. BuildSignature<TFieldStorage, TM64ResourceName, WAccessor>(builder, typesOnly);
  320. return true;
  321. }
  322. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  323. BuildSignature<TFieldStorage, TMResourceName, Accessor>(builder, typesOnly);
  324. return true;
  325. }
  326. builder.SetError("Unexpected Resource tag");
  327. return true;
  328. }
  329. private:
  330. template <typename TInput, bool AlwaysZero, bool InputFractional>
  331. static TFieldStorage Core(TInput val) {
  332. if constexpr (AlwaysZero) {
  333. return 0;
  334. }
  335. if constexpr (InputFractional) {
  336. if constexpr (Fractional) {
  337. return (val / Scale) % Limit;
  338. } else {
  339. return (val / 1000000u / Scale) % Limit;
  340. }
  341. } else {
  342. if constexpr (Fractional) {
  343. return 0;
  344. } else {
  345. return (val / Scale) % Limit;
  346. }
  347. }
  348. }
  349. template<typename TResult, TResult (*Func)(const TUnboxedValuePod&)>
  350. class TImpl : public TBoxedValue {
  351. public:
  352. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  353. Y_UNUSED(valueBuilder);
  354. EMPTY_RESULT_ON_EMPTY_ARG(0);
  355. return TUnboxedValuePod((TResult(Func(args[0])) / Divisor));
  356. }
  357. };
  358. template<typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)>
  359. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  360. builder.Returns<TResult>();
  361. builder.Args()->Add<TAutoMap<TResource<TResourceName>>>();
  362. builder.IsStrict();
  363. if (!typesOnly) {
  364. builder.Implementation(new TImpl<TResult, Func>());
  365. }
  366. }
  367. };
  368. namespace {
  369. // FIXME: The default value for TResourceName allows to omit
  370. // explicit specialization in functions that still doesn't support
  371. // big datetime types. Should be removed in future.
  372. template<const char* TResourceName = TMResourceName, typename TValue,
  373. typename TStorage = std::conditional_t<TResourceName == TMResourceName,
  374. TTMStorage, TTM64Storage>>
  375. const TStorage& Reference(const TValue& value) {
  376. return *reinterpret_cast<const TStorage*>(value.GetRawPtr());
  377. }
  378. // FIXME: The default value for TResourceName allows to omit
  379. // explicit specialization in functions that still doesn't support
  380. // big datetime types. Should be removed in future.
  381. template<const char* TResourceName = TMResourceName, typename TValue,
  382. typename TStorage = std::conditional_t<TResourceName == TMResourceName,
  383. TTMStorage, TTM64Storage>>
  384. TStorage& Reference(TValue& value) {
  385. return *reinterpret_cast<TStorage*>(value.GetRawPtr());
  386. }
  387. template<const char* TResourceName>
  388. TUnboxedValuePod DoAddMonths(const TUnboxedValuePod& date, i64 months, const NUdf::IDateBuilder& builder) {
  389. auto result = date;
  390. auto& storage = Reference<TResourceName>(result);
  391. if (!NYql::DateTime::DoAddMonths(storage, months, builder)) {
  392. return TUnboxedValuePod{};
  393. }
  394. return result;
  395. }
  396. template<const char* TResourceName>
  397. TUnboxedValuePod DoAddQuarters(const TUnboxedValuePod& date, i64 quarters, const NUdf::IDateBuilder& builder) {
  398. return DoAddMonths<TResourceName>(date, quarters * 3ll, builder);
  399. }
  400. template<const char* TResourceName>
  401. TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf::IDateBuilder& builder) {
  402. auto result = date;
  403. auto& storage = Reference<TResourceName>(result);
  404. if (!NYql::DateTime::DoAddYears(storage, years, builder)) {
  405. return TUnboxedValuePod{};
  406. }
  407. return result;
  408. }
  409. // FIXME: The default value for TResourceName allows to omit
  410. // explicit specialization in functions that still doesn't support
  411. // big datetime types. Should be removed in future.
  412. #define ACCESSORS_POLY(field, type, wtype) \
  413. template<const char* TResourceName = TMResourceName, typename TValue, \
  414. typename rtype = std::conditional_t<TResourceName == TMResourceName, \
  415. type, wtype>> \
  416. inline rtype Get##field(const TValue& tm) { \
  417. return (rtype)Reference<TResourceName>(tm).field; \
  418. } \
  419. template<const char* TResourceName = TMResourceName, typename TValue, \
  420. typename itype = std::conditional_t<TResourceName == TMResourceName, \
  421. type, wtype>> \
  422. inline void Set##field(TValue& tm, itype value) { \
  423. Reference<TResourceName>(tm).field = value; \
  424. } \
  425. #define ACCESSORS(field, type) \
  426. ACCESSORS_POLY(field, type, type)
  427. ACCESSORS_POLY(Year, ui16, i32)
  428. ACCESSORS(DayOfYear, ui16)
  429. ACCESSORS(WeekOfYear, ui8)
  430. ACCESSORS(WeekOfYearIso8601, ui8)
  431. ACCESSORS(DayOfWeek, ui8)
  432. ACCESSORS(Month, ui8)
  433. ACCESSORS(Day, ui8)
  434. ACCESSORS(Hour, ui8)
  435. ACCESSORS(Minute, ui8)
  436. ACCESSORS(Second, ui8)
  437. ACCESSORS(Microsecond, ui32)
  438. ACCESSORS(TimezoneId, ui16)
  439. #undef ACCESSORS
  440. #undef ACCESSORS_POLY
  441. // FIXME: The default value for TResourceName allows to omit
  442. // explicit specialization in functions that still doesn't support
  443. // big datetime types. Should be removed in future.
  444. template<const char* TResourceName = TMResourceName>
  445. inline bool ValidateYear(std::conditional_t<TResourceName == TMResourceName, ui16, i32> year) {
  446. if constexpr (TResourceName == TMResourceName) {
  447. return year >= NUdf::MIN_YEAR || year < NUdf::MAX_YEAR;
  448. } else {
  449. return year >= NUdf::MIN_YEAR32 || year < NUdf::MAX_YEAR32;
  450. }
  451. }
  452. inline bool ValidateMonth(ui8 month) {
  453. return month >= 1 && month <= 12;
  454. }
  455. inline bool ValidateDay(ui8 day) {
  456. return day >= 1 && day <= 31;
  457. }
  458. inline bool ValidateHour(ui8 hour) {
  459. return hour < 24;
  460. }
  461. inline bool ValidateMinute(ui8 minute) {
  462. return minute < 60;
  463. }
  464. inline bool ValidateSecond(ui8 second) {
  465. return second < 60;
  466. }
  467. inline bool ValidateMicrosecond(ui32 microsecond) {
  468. return microsecond < 1000000;
  469. }
  470. inline bool ValidateTimezoneId(ui16 timezoneId) {
  471. const auto& zones = NUdf::GetTimezones();
  472. return timezoneId < zones.size() && !zones[timezoneId].empty();
  473. }
  474. inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) {
  475. static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
  476. int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
  477. if (cmp == 0)
  478. return a.size() < b.size();
  479. return cmp < 0;
  480. };
  481. static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
  482. {"jan", 1},
  483. {"feb", 2},
  484. {"mar", 3},
  485. {"apr", 4},
  486. {"may", 5},
  487. {"jun", 6},
  488. {"jul", 7},
  489. {"aug", 8},
  490. {"sep", 9},
  491. {"oct", 10},
  492. {"nov", 11},
  493. {"dec", 12}
  494. };
  495. const auto& it = mp.find(monthName);
  496. if (it != mp.end()) {
  497. month = it -> second;
  498. return true;
  499. }
  500. return false;
  501. }
  502. inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) {
  503. static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
  504. int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
  505. if (cmp == 0)
  506. return a.size() < b.size();
  507. return cmp < 0;
  508. };
  509. static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
  510. {"january", 1},
  511. {"february", 2},
  512. {"march", 3},
  513. {"april", 4},
  514. {"may", 5},
  515. {"june", 6},
  516. {"july", 7},
  517. {"august", 8},
  518. {"september", 9},
  519. {"october", 10},
  520. {"november", 11},
  521. {"december", 12}
  522. };
  523. const auto& it = mp.find(monthName);
  524. if (it != mp.end()) {
  525. month = it -> second;
  526. return true;
  527. }
  528. return false;
  529. }
  530. template<typename TType>
  531. inline bool Validate(typename TDataType<TType>::TLayout arg);
  532. template<>
  533. inline bool Validate<TTimestamp>(ui64 timestamp) {
  534. return timestamp < MAX_TIMESTAMP;
  535. }
  536. template<>
  537. inline bool Validate<TTimestamp64>(i64 timestamp) {
  538. return timestamp >= MIN_TIMESTAMP64 && timestamp <= MAX_TIMESTAMP64;
  539. }
  540. template<>
  541. inline bool Validate<TInterval>(i64 interval) {
  542. return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP);
  543. }
  544. template<>
  545. inline bool Validate<TInterval64>(i64 interval) {
  546. return interval >= -MAX_INTERVAL64 && interval <= MAX_INTERVAL64;
  547. }
  548. // Split
  549. template<typename TUserDataType, bool Nullable>
  550. using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result,
  551. TTzDateBlockReader<TUserDataType, Nullable>,
  552. TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>;
  553. template<typename TUserDataType>
  554. struct TSplitKernelExec : TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> {
  555. static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder);
  556. template<typename TSink>
  557. static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) {
  558. try {
  559. TBlockItem res {0};
  560. Split(arg, Reference(res), *valueBuilder);
  561. sink(res);
  562. } catch (const std::exception& e) {
  563. UdfTerminate((TStringBuilder() << e.what()).data());
  564. }
  565. }
  566. };
  567. template <typename TUserDataType>
  568. class TSplit : public TBoxedValue {
  569. const TSourcePosition Pos_;
  570. public:
  571. explicit TSplit(TSourcePosition pos)
  572. : Pos_(pos)
  573. {}
  574. TUnboxedValue Run(
  575. const IValueBuilder* valueBuilder,
  576. const TUnboxedValuePod* args) const override;
  577. static bool DeclareSignature(
  578. TStringRef name,
  579. TType* userType,
  580. IFunctionTypeInfoBuilder& builder,
  581. bool typesOnly)
  582. {
  583. const auto typeInfoHelper = builder.TypeInfoHelper();
  584. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  585. Y_ENSURE(tuple);
  586. Y_ENSURE(tuple.GetElementsCount() > 0);
  587. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  588. Y_ENSURE(argsTuple);
  589. if (argsTuple.GetElementsCount() != 1) {
  590. builder.SetError("Expected one argument");
  591. return true;
  592. }
  593. auto argType = argsTuple.GetElementType(0);
  594. builder.UserType(userType);
  595. builder.SupportsBlocks();
  596. builder.IsStrict();
  597. TBlockTypeInspector block(*typeInfoHelper, argType);
  598. if (block) {
  599. const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build();
  600. builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap);
  601. const auto* retType = builder.Resource(TMResourceName);
  602. const auto* blockRetType = builder.Block(false)->Item(retType).Build();
  603. builder.Returns(blockRetType);
  604. if (!typesOnly) {
  605. builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(),
  606. TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE));
  607. }
  608. } else {
  609. builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap);
  610. if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::BigDateType) {
  611. builder.Returns(builder.Resource(TM64ResourceName));
  612. } else {
  613. builder.Returns(builder.Resource(TMResourceName));
  614. }
  615. if (!typesOnly) {
  616. builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition()));
  617. }
  618. }
  619. return true;
  620. }
  621. };
  622. template <>
  623. void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  624. storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>());
  625. }
  626. template <>
  627. void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  628. storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>());
  629. }
  630. template <>
  631. void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  632. storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>());
  633. }
  634. template <>
  635. void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  636. storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId());
  637. }
  638. template <>
  639. void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  640. storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId());
  641. }
  642. template <>
  643. void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  644. storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId());
  645. }
  646. template <>
  647. void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  648. ythrow yexception() << "Not implemented";
  649. }
  650. template <>
  651. void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  652. ythrow yexception() << "Not implemented";
  653. }
  654. template <>
  655. void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  656. ythrow yexception() << "Not implemented";
  657. }
  658. template <>
  659. TUnboxedValue TSplit<TDate>::Run(
  660. const IValueBuilder* valueBuilder,
  661. const TUnboxedValuePod* args) const
  662. {
  663. try {
  664. EMPTY_RESULT_ON_EMPTY_ARG(0);
  665. auto& builder = valueBuilder->GetDateBuilder();
  666. TUnboxedValuePod result(0);
  667. auto& storage = Reference(result);
  668. storage.FromDate(builder, args[0].Get<ui16>());
  669. return result;
  670. } catch (const std::exception& e) {
  671. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  672. }
  673. }
  674. template <>
  675. TUnboxedValue TSplit<TDate32>::Run(
  676. const IValueBuilder* valueBuilder,
  677. const TUnboxedValuePod* args) const
  678. {
  679. try {
  680. EMPTY_RESULT_ON_EMPTY_ARG(0);
  681. TUnboxedValuePod result(0);
  682. auto& storage = Reference<TM64ResourceName>(result);
  683. storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>());
  684. return result;
  685. } catch (const std::exception& e) {
  686. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  687. }
  688. }
  689. template <>
  690. TUnboxedValue TSplit<TDatetime>::Run(
  691. const IValueBuilder* valueBuilder,
  692. const TUnboxedValuePod* args) const
  693. {
  694. try {
  695. EMPTY_RESULT_ON_EMPTY_ARG(0);
  696. auto& builder = valueBuilder->GetDateBuilder();
  697. TUnboxedValuePod result(0);
  698. auto& storage = Reference(result);
  699. storage.FromDatetime(builder, args[0].Get<ui32>());
  700. return result;
  701. } catch (const std::exception& e) {
  702. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  703. }
  704. }
  705. template <>
  706. TUnboxedValue TSplit<TDatetime64>::Run(
  707. const IValueBuilder* valueBuilder,
  708. const TUnboxedValuePod* args) const
  709. {
  710. try {
  711. EMPTY_RESULT_ON_EMPTY_ARG(0);
  712. TUnboxedValuePod result(0);
  713. auto& storage = Reference<TM64ResourceName>(result);
  714. storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
  715. return result;
  716. } catch (const std::exception& e) {
  717. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  718. }
  719. }
  720. template <>
  721. TUnboxedValue TSplit<TTimestamp>::Run(
  722. const IValueBuilder* valueBuilder,
  723. const TUnboxedValuePod* args) const
  724. {
  725. try {
  726. EMPTY_RESULT_ON_EMPTY_ARG(0);
  727. auto& builder = valueBuilder->GetDateBuilder();
  728. TUnboxedValuePod result(0);
  729. auto& storage = Reference(result);
  730. storage.FromTimestamp(builder, args[0].Get<ui64>());
  731. return result;
  732. } catch (const std::exception& e) {
  733. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  734. }
  735. }
  736. template <>
  737. TUnboxedValue TSplit<TTimestamp64>::Run(
  738. const IValueBuilder* valueBuilder,
  739. const TUnboxedValuePod* args) const
  740. {
  741. try {
  742. EMPTY_RESULT_ON_EMPTY_ARG(0);
  743. TUnboxedValuePod result(0);
  744. auto& storage = Reference<TM64ResourceName>(result);
  745. storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
  746. return result;
  747. } catch (const std::exception& e) {
  748. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  749. }
  750. }
  751. template <>
  752. TUnboxedValue TSplit<TTzDate>::Run(
  753. const IValueBuilder* valueBuilder,
  754. const TUnboxedValuePod* args) const
  755. {
  756. try {
  757. EMPTY_RESULT_ON_EMPTY_ARG(0);
  758. auto& builder = valueBuilder->GetDateBuilder();
  759. TUnboxedValuePod result(0);
  760. auto& storage = Reference(result);
  761. storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId());
  762. return result;
  763. } catch (const std::exception& e) {
  764. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  765. }
  766. }
  767. template <>
  768. TUnboxedValue TSplit<TTzDatetime>::Run(
  769. const IValueBuilder* valueBuilder,
  770. const TUnboxedValuePod* args) const
  771. {
  772. try {
  773. EMPTY_RESULT_ON_EMPTY_ARG(0);
  774. auto& builder = valueBuilder->GetDateBuilder();
  775. TUnboxedValuePod result(0);
  776. auto& storage = Reference(result);
  777. storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId());
  778. return result;
  779. } catch (const std::exception& e) {
  780. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  781. }
  782. }
  783. template <>
  784. TUnboxedValue TSplit<TTzTimestamp>::Run(
  785. const IValueBuilder* valueBuilder,
  786. const TUnboxedValuePod* args) const
  787. {
  788. try {
  789. EMPTY_RESULT_ON_EMPTY_ARG(0);
  790. auto& builder = valueBuilder->GetDateBuilder();
  791. TUnboxedValuePod result(0);
  792. auto& storage = Reference(result);
  793. storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId());
  794. return result;
  795. } catch (const std::exception& e) {
  796. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  797. }
  798. }
  799. // Make*
  800. template<typename TUserDataType, bool Nullable>
  801. using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result,
  802. TTzDateArrayBuilder<TUserDataType, Nullable>,
  803. TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>;
  804. template<typename TUserDataType>
  805. struct TMakeDateKernelExec : TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> {
  806. static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder);
  807. template<typename TSink>
  808. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  809. auto& storage = Reference(item);
  810. sink(TBlockItem(Make(storage, *valueBuilder)));
  811. }
  812. };
  813. template<> TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  814. TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false));
  815. return res;
  816. }
  817. template<> TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  818. TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
  819. return res;
  820. }
  821. template<> TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  822. TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
  823. return res;
  824. }
  825. template<> TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  826. TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true));
  827. res.SetTimezoneId(storage.TimezoneId);
  828. return res;
  829. }
  830. template<> TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  831. TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
  832. res.SetTimezoneId(storage.TimezoneId);
  833. return res;
  834. }
  835. template<> TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  836. TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
  837. res.SetTimezoneId(storage.TimezoneId);
  838. return res;
  839. }
  840. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) {
  841. auto& builder = valueBuilder->GetDateBuilder();
  842. auto& storage = Reference(args[0]);
  843. return TUnboxedValuePod(storage.ToDate(builder, false));
  844. }
  845. END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do);
  846. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) {
  847. auto& builder = valueBuilder->GetDateBuilder();
  848. auto& storage = Reference(args[0]);
  849. return TUnboxedValuePod(storage.ToDatetime(builder));
  850. }
  851. END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do);
  852. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) {
  853. auto& builder = valueBuilder->GetDateBuilder();
  854. auto& storage = Reference(args[0]);
  855. return TUnboxedValuePod(storage.ToTimestamp(builder));
  856. }
  857. END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do);
  858. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) {
  859. auto& builder = valueBuilder->GetDateBuilder();
  860. auto& storage = Reference(args[0]);
  861. try {
  862. TUnboxedValuePod result(storage.ToDate(builder, true));
  863. result.SetTimezoneId(storage.TimezoneId);
  864. return result;
  865. } catch (const std::exception& e) {
  866. UdfTerminate((TStringBuilder() << Pos_ << "Timestamp "
  867. << storage.ToString()
  868. << " cannot be casted to TzDate"
  869. ).data());
  870. }
  871. }
  872. END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do);
  873. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) {
  874. auto& builder = valueBuilder->GetDateBuilder();
  875. auto& storage = Reference(args[0]);
  876. TUnboxedValuePod result(storage.ToDatetime(builder));
  877. result.SetTimezoneId(storage.TimezoneId);
  878. return result;
  879. }
  880. END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do);
  881. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) {
  882. auto& builder = valueBuilder->GetDateBuilder();
  883. auto& storage = Reference(args[0]);
  884. TUnboxedValuePod result(storage.ToTimestamp(builder));
  885. result.SetTimezoneId(storage.TimezoneId);
  886. return result;
  887. }
  888. END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do);
  889. SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) {
  890. Y_UNUSED(valueBuilder);
  891. TUnboxedValuePod result(0);
  892. auto& arg = Reference(args[0]);
  893. auto& storage = Reference<TM64ResourceName>(result);
  894. storage.From(arg);
  895. return result;
  896. }
  897. SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) {
  898. auto& storage = Reference<TM64ResourceName>(args[0]);
  899. return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder()));
  900. }
  901. SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) {
  902. auto& storage = Reference<TM64ResourceName>(args[0]);
  903. return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder()));
  904. }
  905. SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) {
  906. auto& storage = Reference<TM64ResourceName>(args[0]);
  907. return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder()));
  908. }
  909. // Get*
  910. // #define GET_METHOD(field, type) \
  911. // struct TGet##field##KernelExec : TUnaryKernelExec<TGet##field##KernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<type, false>> { \
  912. // template<typename TSink> \
  913. // static void Process(TBlockItem item, const IValueBuilder& valueBuilder, const TSink& sink) { \
  914. // Y_UNUSED(valueBuilder); \
  915. // sink(TBlockItem(Get##field(item))); \
  916. // } \
  917. // }; \
  918. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \
  919. // Y_UNUSED(valueBuilder); \
  920. // return TUnboxedValuePod(Get##field(args[0])); \
  921. // } \
  922. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  923. template<const char* TUdfName,
  924. typename TResultType, TResultType (*Accessor)(const TUnboxedValuePod&),
  925. typename TResultWType, TResultWType (*WAccessor)(const TUnboxedValuePod&)>
  926. class TGetDateComponent: public ::NYql::NUdf::TBoxedValue {
  927. public:
  928. typedef bool TTypeAwareMarker;
  929. static const ::NYql::NUdf::TStringRef& Name() {
  930. static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
  931. return name;
  932. }
  933. static bool DeclareSignature(
  934. const ::NYql::NUdf::TStringRef& name,
  935. ::NYql::NUdf::TType* userType,
  936. ::NYql::NUdf::IFunctionTypeInfoBuilder& builder,
  937. bool typesOnly)
  938. {
  939. if (Name() != name) {
  940. return false;
  941. }
  942. if (!userType) {
  943. builder.SetError("User type is missing");
  944. return true;
  945. }
  946. builder.UserType(userType);
  947. const auto typeInfoHelper = builder.TypeInfoHelper();
  948. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  949. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  950. Y_ENSURE(tuple.GetElementsCount() > 0,
  951. "Tuple has to contain positional arguments");
  952. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  953. Y_ENSURE(argsTuple, "Tuple with args expected");
  954. if (argsTuple.GetElementsCount() != 1) {
  955. builder.SetError("Single argument expected");
  956. return true;
  957. }
  958. auto argType = argsTuple.GetElementType(0);
  959. if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  960. argType = optType.GetItemType();
  961. }
  962. TResourceTypeInspector resource(*typeInfoHelper, argType);
  963. if (!resource) {
  964. TDataTypeInspector data(*typeInfoHelper, argType);
  965. if (!data) {
  966. builder.SetError("Data type expected");
  967. return true;
  968. }
  969. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  970. if (features & NUdf::BigDateType) {
  971. BuildSignature<TResultWType, TM64ResourceName, WAccessor>(builder, typesOnly);
  972. return true;
  973. }
  974. if (features & (NUdf::DateType | NUdf::TzDateType)) {
  975. BuildSignature<TResultType, TMResourceName, Accessor>(builder, typesOnly);
  976. return true;
  977. }
  978. ::TStringBuilder sb;
  979. sb << "Invalid argument type: got ";
  980. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  981. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  982. << TM64ResourceName << "> expected";
  983. builder.SetError(sb);
  984. return true;
  985. }
  986. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  987. BuildSignature<TResultWType, TM64ResourceName, WAccessor>(builder, typesOnly);
  988. return true;
  989. }
  990. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  991. BuildSignature<TResultType, TMResourceName, Accessor>(builder, typesOnly);
  992. return true;
  993. }
  994. builder.SetError("Unexpected Resource tag");
  995. return true;
  996. }
  997. private:
  998. template<typename TResult, TResult (*Func)(const TUnboxedValuePod&)>
  999. class TImpl : public TBoxedValue {
  1000. public:
  1001. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  1002. Y_UNUSED(valueBuilder);
  1003. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1004. return TUnboxedValuePod(TResult(Func(args[0])));
  1005. }
  1006. };
  1007. template<typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)>
  1008. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  1009. builder.Returns<TResult>();
  1010. builder.Args()->Add<TAutoMap<TResource<TResourceName>>>();
  1011. builder.IsStrict();
  1012. if (!typesOnly) {
  1013. builder.Implementation(new TImpl<TResult, Func>());
  1014. }
  1015. }
  1016. };
  1017. // TODO: Merge this with <TGetDateComponent> class.
  1018. template<const char* TUdfName, auto Accessor, auto WAccessor>
  1019. class TGetDateComponentName: public ::NYql::NUdf::TBoxedValue {
  1020. public:
  1021. typedef bool TTypeAwareMarker;
  1022. static const ::NYql::NUdf::TStringRef& Name() {
  1023. static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
  1024. return name;
  1025. }
  1026. static bool DeclareSignature(
  1027. const ::NYql::NUdf::TStringRef& name,
  1028. ::NYql::NUdf::TType* userType,
  1029. ::NYql::NUdf::IFunctionTypeInfoBuilder& builder,
  1030. bool typesOnly)
  1031. {
  1032. if (Name() != name) {
  1033. return false;
  1034. }
  1035. if (!userType) {
  1036. builder.SetError("User type is missing");
  1037. return true;
  1038. }
  1039. builder.UserType(userType);
  1040. const auto typeInfoHelper = builder.TypeInfoHelper();
  1041. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  1042. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  1043. Y_ENSURE(tuple.GetElementsCount() > 0,
  1044. "Tuple has to contain positional arguments");
  1045. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  1046. Y_ENSURE(argsTuple, "Tuple with args expected");
  1047. if (argsTuple.GetElementsCount() != 1) {
  1048. builder.SetError("Single argument expected");
  1049. return true;
  1050. }
  1051. auto argType = argsTuple.GetElementType(0);
  1052. if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  1053. argType = optType.GetItemType();
  1054. }
  1055. TResourceTypeInspector resource(*typeInfoHelper, argType);
  1056. if (!resource) {
  1057. TDataTypeInspector data(*typeInfoHelper, argType);
  1058. if (!data) {
  1059. builder.SetError("Data type expected");
  1060. return true;
  1061. }
  1062. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  1063. if (features & NUdf::BigDateType) {
  1064. BuildSignature<TM64ResourceName, WAccessor>(builder, typesOnly);
  1065. return true;
  1066. }
  1067. if (features & (NUdf::DateType | NUdf::TzDateType)) {
  1068. BuildSignature<TMResourceName, Accessor>(builder, typesOnly);
  1069. return true;
  1070. }
  1071. ::TStringBuilder sb;
  1072. sb << "Invalid argument type: got ";
  1073. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  1074. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  1075. << TM64ResourceName << "> expected";
  1076. builder.SetError(sb);
  1077. return true;
  1078. }
  1079. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  1080. BuildSignature<TM64ResourceName, WAccessor>(builder, typesOnly);
  1081. return true;
  1082. }
  1083. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  1084. BuildSignature<TMResourceName, Accessor>(builder, typesOnly);
  1085. return true;
  1086. }
  1087. builder.SetError("Unexpected Resource tag");
  1088. return true;
  1089. }
  1090. private:
  1091. template<auto Func>
  1092. class TImpl : public TBoxedValue {
  1093. public:
  1094. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  1095. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1096. return Func(valueBuilder, args[0]);
  1097. }
  1098. };
  1099. template<const char* TResourceName, auto Func>
  1100. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  1101. builder.Returns<char*>();
  1102. builder.Args()->Add<TAutoMap<TResource<TResourceName>>>();
  1103. builder.IsStrict();
  1104. if (!typesOnly) {
  1105. builder.Implementation(new TImpl<Func>());
  1106. }
  1107. }
  1108. };
  1109. // template<typename TValue>
  1110. // TValue GetMonthNameValue(size_t idx) {
  1111. // static const std::array<TValue, 12U> monthNames = {{
  1112. // TValue::Embedded(TStringRef::Of("January")),
  1113. // TValue::Embedded(TStringRef::Of("February")),
  1114. // TValue::Embedded(TStringRef::Of("March")),
  1115. // TValue::Embedded(TStringRef::Of("April")),
  1116. // TValue::Embedded(TStringRef::Of("May")),
  1117. // TValue::Embedded(TStringRef::Of("June")),
  1118. // TValue::Embedded(TStringRef::Of("July")),
  1119. // TValue::Embedded(TStringRef::Of("August")),
  1120. // TValue::Embedded(TStringRef::Of("September")),
  1121. // TValue::Embedded(TStringRef::Of("October")),
  1122. // TValue::Embedded(TStringRef::Of("November")),
  1123. // TValue::Embedded(TStringRef::Of("December"))
  1124. // }};
  1125. // return monthNames.at(idx);
  1126. // }
  1127. // struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
  1128. // template<typename TSink>
  1129. // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1130. // Y_UNUSED(valueBuilder);
  1131. // sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U));
  1132. // }
  1133. // };
  1134. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1135. // Y_UNUSED(valueBuilder);
  1136. // return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U);
  1137. // }
  1138. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  1139. template<const char* TResourceName>
  1140. TUnboxedValue GetMonthName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) {
  1141. Y_UNUSED(valueBuilder);
  1142. static const std::array<TUnboxedValue, 12U> monthNames = {{
  1143. TUnboxedValuePod::Embedded(TStringRef::Of("January")),
  1144. TUnboxedValuePod::Embedded(TStringRef::Of("February")),
  1145. TUnboxedValuePod::Embedded(TStringRef::Of("March")),
  1146. TUnboxedValuePod::Embedded(TStringRef::Of("April")),
  1147. TUnboxedValuePod::Embedded(TStringRef::Of("May")),
  1148. TUnboxedValuePod::Embedded(TStringRef::Of("June")),
  1149. TUnboxedValuePod::Embedded(TStringRef::Of("July")),
  1150. TUnboxedValuePod::Embedded(TStringRef::Of("August")),
  1151. TUnboxedValuePod::Embedded(TStringRef::Of("September")),
  1152. TUnboxedValuePod::Embedded(TStringRef::Of("October")),
  1153. TUnboxedValuePod::Embedded(TStringRef::Of("November")),
  1154. TUnboxedValuePod::Embedded(TStringRef::Of("December"))
  1155. }};
  1156. return monthNames.at(GetMonth<TResourceName>(arg) - 1U);
  1157. }
  1158. // struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> {
  1159. // template<typename TSink>
  1160. // static void Process(TBlockItem item, const TSink& sink) {
  1161. // sink(GetDay(item));
  1162. // }
  1163. // };
  1164. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) {
  1165. // Y_UNUSED(valueBuilder);
  1166. // return TUnboxedValuePod(GetDay(args[0]));
  1167. // }
  1168. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  1169. template<const char* TResourceName>
  1170. TUnboxedValue GetDayOfWeekName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) {
  1171. Y_UNUSED(valueBuilder);
  1172. static const std::array<TUnboxedValue, 7U> dayNames = {{
  1173. TUnboxedValuePod::Embedded(TStringRef::Of("Monday")),
  1174. TUnboxedValuePod::Embedded(TStringRef::Of("Tuesday")),
  1175. TUnboxedValuePod::Embedded(TStringRef::Of("Wednesday")),
  1176. TUnboxedValuePod::Embedded(TStringRef::Of("Thursday")),
  1177. TUnboxedValuePod::Embedded(TStringRef::Of("Friday")),
  1178. TUnboxedValuePod::Embedded(TStringRef::Of("Saturday")),
  1179. TUnboxedValuePod::Embedded(TStringRef::Of("Sunday"))
  1180. }};
  1181. return dayNames.at(GetDayOfWeek<TResourceName>(arg) - 1U);
  1182. }
  1183. // struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
  1184. // template<typename TSink>
  1185. // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1186. // Y_UNUSED(valueBuilder);
  1187. // sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U));
  1188. // }
  1189. // };
  1190. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1191. // Y_UNUSED(valueBuilder);
  1192. // return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U);
  1193. // }
  1194. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  1195. struct TTGetTimezoneNameKernelExec : TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> {
  1196. template<typename TSink>
  1197. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1198. Y_UNUSED(valueBuilder);
  1199. auto timezoneId = GetTimezoneId(item);
  1200. if (timezoneId >= NUdf::GetTimezones().size()) {
  1201. sink(TBlockItem{});
  1202. } else {
  1203. sink(TBlockItem{NUdf::GetTimezones()[timezoneId]});
  1204. }
  1205. }
  1206. };
  1207. BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetTimezoneName, char*(TAutoMap<TResource<TMResourceName>>)) {
  1208. auto timezoneId = GetTimezoneId(args[0]);
  1209. if (timezoneId >= NUdf::GetTimezones().size()) {
  1210. return TUnboxedValuePod();
  1211. }
  1212. return valueBuilder->NewString(NUdf::GetTimezones()[timezoneId]);
  1213. }
  1214. END_SIMPLE_ARROW_UDF(TGetTimezoneName, TTGetTimezoneNameKernelExec::Do);
  1215. template<const char* TResourceName>
  1216. TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) {
  1217. const ui16 tzId = GetTimezoneId<TResourceName>(arg);
  1218. const auto& tzNames = NUdf::GetTimezones();
  1219. if (tzId >= tzNames.size()) {
  1220. return TUnboxedValuePod();
  1221. }
  1222. return valueBuilder->NewString(tzNames[tzId]);
  1223. }
  1224. // Update
  1225. class TUpdate : public TBoxedValue {
  1226. public:
  1227. typedef bool TTypeAwareMarker;
  1228. static const TStringRef& Name() {
  1229. static auto name = TStringRef::Of("Update");
  1230. return name;
  1231. }
  1232. static bool DeclareSignature(
  1233. const TStringRef& name,
  1234. TType* userType,
  1235. IFunctionTypeInfoBuilder& builder,
  1236. bool typesOnly)
  1237. {
  1238. if (Name() != name) {
  1239. return false;
  1240. }
  1241. if (!userType) {
  1242. builder.SetError("User type is missing");
  1243. return true;
  1244. }
  1245. builder.UserType(userType);
  1246. const auto typeInfoHelper = builder.TypeInfoHelper();
  1247. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  1248. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  1249. Y_ENSURE(tuple.GetElementsCount() > 0,
  1250. "Tuple has to contain positional arguments");
  1251. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  1252. Y_ENSURE(argsTuple, "Tuple with args expected");
  1253. if (argsTuple.GetElementsCount() == 0) {
  1254. builder.SetError("At least one argument expected");
  1255. return true;
  1256. }
  1257. auto argType = argsTuple.GetElementType(0);
  1258. if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  1259. argType = optType.GetItemType();
  1260. }
  1261. TResourceTypeInspector resource(*typeInfoHelper, argType);
  1262. if (!resource) {
  1263. TDataTypeInspector data(*typeInfoHelper, argType);
  1264. if (!data) {
  1265. SetInvalidTypeError(builder, typeInfoHelper, argType);
  1266. return true;
  1267. }
  1268. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  1269. if (features & NUdf::BigDateType) {
  1270. BuildSignature<TM64ResourceName>(builder, typesOnly);
  1271. return true;
  1272. }
  1273. if (features & (NUdf::DateType | NUdf::TzDateType)) {
  1274. BuildSignature<TMResourceName>(builder, typesOnly);
  1275. return true;
  1276. }
  1277. SetInvalidTypeError(builder, typeInfoHelper, argType);
  1278. return true;
  1279. }
  1280. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  1281. BuildSignature<TM64ResourceName>(builder, typesOnly);
  1282. return true;
  1283. }
  1284. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  1285. BuildSignature<TMResourceName>(builder, typesOnly);
  1286. return true;
  1287. }
  1288. ::TStringBuilder sb;
  1289. sb << "Unexpected Resource tag: got '" << resource.GetTag() << "'";
  1290. builder.SetError(sb);
  1291. return true;
  1292. }
  1293. private:
  1294. template<const char* TResourceName>
  1295. class TImpl : public TBoxedValue {
  1296. public:
  1297. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  1298. try {
  1299. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1300. auto result = args[0];
  1301. if (args[1]) {
  1302. auto year = args[1].Get<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>();
  1303. if (!ValidateYear<TResourceName>(year)) {
  1304. return TUnboxedValuePod();
  1305. }
  1306. SetYear<TResourceName>(result, year);
  1307. }
  1308. if (args[2]) {
  1309. auto month = args[2].Get<ui8>();
  1310. if (!ValidateMonth(month)) {
  1311. return TUnboxedValuePod();
  1312. }
  1313. SetMonth<TResourceName>(result, month);
  1314. }
  1315. if (args[3]) {
  1316. auto day = args[3].Get<ui8>();
  1317. if (!ValidateDay(day)) {
  1318. return TUnboxedValuePod();
  1319. }
  1320. SetDay<TResourceName>(result, day);
  1321. }
  1322. if (args[4]) {
  1323. auto hour = args[4].Get<ui8>();
  1324. if (!ValidateHour(hour)) {
  1325. return TUnboxedValuePod();
  1326. }
  1327. SetHour<TResourceName>(result, hour);
  1328. }
  1329. if (args[5]) {
  1330. auto minute = args[5].Get<ui8>();
  1331. if (!ValidateMinute(minute)) {
  1332. return TUnboxedValuePod();
  1333. }
  1334. SetMinute<TResourceName>(result, minute);
  1335. }
  1336. if (args[6]) {
  1337. auto second = args[6].Get<ui8>();
  1338. if (!ValidateSecond(second)) {
  1339. return TUnboxedValuePod();
  1340. }
  1341. SetSecond<TResourceName>(result, second);
  1342. }
  1343. if (args[7]) {
  1344. auto microsecond = args[7].Get<ui32>();
  1345. if (!ValidateMicrosecond(microsecond)) {
  1346. return TUnboxedValuePod();
  1347. }
  1348. SetMicrosecond<TResourceName>(result, microsecond);
  1349. }
  1350. if (args[8]) {
  1351. auto timezoneId = args[8].Get<ui16>();
  1352. if (!ValidateTimezoneId(timezoneId)) {
  1353. return TUnboxedValuePod();
  1354. }
  1355. SetTimezoneId<TResourceName>(result, timezoneId);
  1356. }
  1357. auto& builder = valueBuilder->GetDateBuilder();
  1358. auto& storage = Reference<TResourceName>(result);
  1359. if (!storage.Validate(builder)) {
  1360. return TUnboxedValuePod();
  1361. }
  1362. return result;
  1363. } catch (const std::exception& e) {
  1364. TStringBuilder sb;
  1365. sb << CurrentExceptionMessage();
  1366. sb << Endl << "[" << TStringBuf(Name()) << "]" ;
  1367. UdfTerminate(sb.c_str());
  1368. }
  1369. }
  1370. };
  1371. static void SetInvalidTypeError(NUdf::IFunctionTypeInfoBuilder& builder,
  1372. ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType)
  1373. {
  1374. ::TStringBuilder sb;
  1375. sb << "Invalid argument type: got ";
  1376. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  1377. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  1378. << TM64ResourceName << "> expected";
  1379. builder.SetError(sb);
  1380. }
  1381. template<const char* TResourceName>
  1382. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  1383. builder.Returns<TOptional<TResource<TResourceName>>>();
  1384. builder.OptionalArgs(8).Args()->Add<TAutoMap<TResource<TResourceName>>>()
  1385. .template Add<TOptional<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>>().Name("Year")
  1386. .template Add<TOptional<ui8>>().Name("Month")
  1387. .template Add<TOptional<ui8>>().Name("Day")
  1388. .template Add<TOptional<ui8>>().Name("Hour")
  1389. .template Add<TOptional<ui8>>().Name("Minute")
  1390. .template Add<TOptional<ui8>>().Name("Second")
  1391. .template Add<TOptional<ui32>>().Name("Microsecond")
  1392. .template Add<TOptional<ui16>>().Name("TimezoneId");
  1393. builder.IsStrict();
  1394. if (!typesOnly) {
  1395. builder.Implementation(new TImpl<TResourceName>());
  1396. }
  1397. }
  1398. };
  1399. // From*
  1400. template<typename TInput, typename TOutput, i64 UsecMultiplier>
  1401. inline TUnboxedValuePod TFromConverter(TInput arg) {
  1402. using TLayout = TDataType<TOutput>::TLayout;
  1403. const TLayout usec = TLayout(arg) * UsecMultiplier;
  1404. return Validate<TOutput>(usec) ? TUnboxedValuePod(usec) : TUnboxedValuePod();
  1405. }
  1406. template<typename TInput, typename TOutput, i64 UsecMultiplier>
  1407. using TFromConverterKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput,
  1408. typename TDataType<TOutput>::TLayout, [] (TInput arg) {
  1409. using TLayout = TDataType<TOutput>::TLayout;
  1410. const TLayout usec = TLayout(arg) * UsecMultiplier;
  1411. return std::make_pair(usec, Validate<TOutput>(usec));
  1412. }>;
  1413. #define DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier) \
  1414. BEGIN_SIMPLE_STRICT_ARROW_UDF(T##name, TOptional<retType>(TAutoMap<argType>)) { \
  1415. Y_UNUSED(valueBuilder); \
  1416. return TFromConverter<argType, retType, usecMultiplier>(args[0].Get<argType>()); \
  1417. } \
  1418. \
  1419. END_SIMPLE_ARROW_UDF(T##name, (TFromConverterKernel<argType, retType, usecMultiplier>::Do))
  1420. DATETIME_FROM_CONVERTER_UDF(FromSeconds, TTimestamp, ui32, UsecondsInSecond);
  1421. DATETIME_FROM_CONVERTER_UDF(FromMilliseconds, TTimestamp, ui64, UsecondsInMilliseconds);
  1422. DATETIME_FROM_CONVERTER_UDF(FromMicroseconds, TTimestamp, ui64, 1);
  1423. DATETIME_FROM_CONVERTER_UDF(FromSeconds64, TTimestamp64, i64, UsecondsInSecond);
  1424. DATETIME_FROM_CONVERTER_UDF(FromMilliseconds64, TTimestamp64, i64, UsecondsInMilliseconds);
  1425. DATETIME_FROM_CONVERTER_UDF(FromMicroseconds64, TTimestamp64, i64, 1);
  1426. DATETIME_FROM_CONVERTER_UDF(IntervalFromDays, TInterval, i32, UsecondsInDay);
  1427. DATETIME_FROM_CONVERTER_UDF(IntervalFromHours, TInterval, i32, UsecondsInHour);
  1428. DATETIME_FROM_CONVERTER_UDF(IntervalFromMinutes, TInterval, i32, UsecondsInMinute);
  1429. DATETIME_FROM_CONVERTER_UDF(IntervalFromSeconds, TInterval, i32, UsecondsInSecond);
  1430. DATETIME_FROM_CONVERTER_UDF(IntervalFromMilliseconds, TInterval, i64, UsecondsInMilliseconds);
  1431. DATETIME_FROM_CONVERTER_UDF(IntervalFromMicroseconds, TInterval, i64, 1);
  1432. DATETIME_FROM_CONVERTER_UDF(Interval64FromDays, TInterval64, i32, UsecondsInDay);
  1433. DATETIME_FROM_CONVERTER_UDF(Interval64FromHours, TInterval64, i64, UsecondsInHour);
  1434. DATETIME_FROM_CONVERTER_UDF(Interval64FromMinutes, TInterval64, i64, UsecondsInMinute);
  1435. DATETIME_FROM_CONVERTER_UDF(Interval64FromSeconds, TInterval64, i64, UsecondsInSecond);
  1436. DATETIME_FROM_CONVERTER_UDF(Interval64FromMilliseconds, TInterval64, i64, UsecondsInMilliseconds);
  1437. DATETIME_FROM_CONVERTER_UDF(Interval64FromMicroseconds, TInterval64, i64, 1);
  1438. // To*
  1439. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToDays, i32(TAutoMap<TInterval>)) {
  1440. Y_UNUSED(valueBuilder);
  1441. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInDay));
  1442. }
  1443. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToDays,
  1444. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInDay); }>),
  1445. arrow::compute::NullHandling::INTERSECTION);
  1446. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToHours, i32(TAutoMap<TInterval>)) {
  1447. Y_UNUSED(valueBuilder);
  1448. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInHour));
  1449. }
  1450. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToHours,
  1451. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInHour); }>),
  1452. arrow::compute::NullHandling::INTERSECTION);
  1453. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToMinutes, i32(TAutoMap<TInterval>)) {
  1454. Y_UNUSED(valueBuilder);
  1455. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInMinute));
  1456. }
  1457. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToMinutes,
  1458. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInMinute); }>),
  1459. arrow::compute::NullHandling::INTERSECTION);
  1460. // StartOf*
  1461. template<auto Core>
  1462. struct TStartOfKernelExec : TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> {
  1463. template<typename TSink>
  1464. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1465. if (auto res = Core(Reference(item), *valueBuilder)) {
  1466. Reference(item) = res.GetRef();
  1467. sink(item);
  1468. } else {
  1469. sink(TBlockItem{});
  1470. }
  1471. }
  1472. };
  1473. template<const char* TResourceName, auto Core>
  1474. TUnboxedValue SimpleDatetimeToDatetimeUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) {
  1475. auto result = args[0];
  1476. auto& storage = Reference<TResourceName>(result);
  1477. if (auto res = Core(storage, *valueBuilder)) {
  1478. storage = res.GetRef();
  1479. return result;
  1480. }
  1481. return TUnboxedValuePod{};
  1482. }
  1483. template<const char* TUdfName, auto Boundary, auto WBoundary>
  1484. class TBoundaryOf: public ::NYql::NUdf::TBoxedValue {
  1485. public:
  1486. typedef bool TTypeAwareMarker;
  1487. static const ::NYql::NUdf::TStringRef& Name() {
  1488. static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
  1489. return name;
  1490. }
  1491. static bool DeclareSignature(
  1492. const ::NYql::NUdf::TStringRef& name,
  1493. ::NYql::NUdf::TType* userType,
  1494. ::NYql::NUdf::IFunctionTypeInfoBuilder& builder,
  1495. bool typesOnly)
  1496. {
  1497. if (Name() != name) {
  1498. return false;
  1499. }
  1500. if (!userType) {
  1501. builder.SetError("User type is missing");
  1502. return true;
  1503. }
  1504. builder.UserType(userType);
  1505. const auto typeInfoHelper = builder.TypeInfoHelper();
  1506. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  1507. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  1508. Y_ENSURE(tuple.GetElementsCount() > 0,
  1509. "Tuple has to contain positional arguments");
  1510. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  1511. Y_ENSURE(argsTuple, "Tuple with args expected");
  1512. if (argsTuple.GetElementsCount() != 1) {
  1513. builder.SetError("Single argument expected");
  1514. return true;
  1515. }
  1516. auto argType = argsTuple.GetElementType(0);
  1517. if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  1518. argType = optType.GetItemType();
  1519. }
  1520. TResourceTypeInspector resource(*typeInfoHelper, argType);
  1521. if (!resource) {
  1522. TDataTypeInspector data(*typeInfoHelper, argType);
  1523. if (!data) {
  1524. SetInvalidTypeError(builder, typeInfoHelper, argType);
  1525. return true;
  1526. }
  1527. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  1528. if (features & NUdf::BigDateType) {
  1529. BuildSignature<TM64ResourceName, WBoundary>(builder, typesOnly);
  1530. return true;
  1531. }
  1532. if (features & (NUdf::DateType | NUdf::TzDateType)) {
  1533. BuildSignature<TMResourceName, Boundary>(builder, typesOnly);
  1534. return true;
  1535. }
  1536. SetInvalidTypeError(builder, typeInfoHelper, argType);
  1537. return true;
  1538. }
  1539. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  1540. BuildSignature<TM64ResourceName, WBoundary>(builder, typesOnly);
  1541. return true;
  1542. }
  1543. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  1544. BuildSignature<TMResourceName, Boundary>(builder, typesOnly);
  1545. return true;
  1546. }
  1547. ::TStringBuilder sb;
  1548. sb << "Unexpected Resource tag: got '" << resource.GetTag() << "'";
  1549. builder.SetError(sb);
  1550. return true;
  1551. }
  1552. private:
  1553. template<auto Func>
  1554. class TImpl : public TBoxedValue {
  1555. public:
  1556. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  1557. try {
  1558. return Func(valueBuilder, args);
  1559. } catch (const std::exception&) {
  1560. TStringBuilder sb;
  1561. sb << CurrentExceptionMessage();
  1562. sb << Endl << "[" << TStringBuf(Name()) << "]" ;
  1563. UdfTerminate(sb.c_str());
  1564. }
  1565. }
  1566. };
  1567. static void SetInvalidTypeError(NUdf::IFunctionTypeInfoBuilder& builder,
  1568. ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType)
  1569. {
  1570. ::TStringBuilder sb;
  1571. sb << "Invalid argument type: got ";
  1572. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  1573. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  1574. << TM64ResourceName << "> expected";
  1575. builder.SetError(sb);
  1576. }
  1577. template< const char* TResourceName, auto Func>
  1578. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  1579. builder.Returns<TOptional<TResource<TResourceName>>>();
  1580. builder.Args()->Add<TAutoMap<TResource<TResourceName>>>();
  1581. builder.IsStrict();
  1582. if (!typesOnly) {
  1583. builder.Implementation(new TImpl<Func>());
  1584. }
  1585. }
  1586. };
  1587. template<typename TStorage>
  1588. void SetStartOfDay(TStorage& storage) {
  1589. storage.Hour = 0;
  1590. storage.Minute = 0;
  1591. storage.Second = 0;
  1592. storage.Microsecond = 0;
  1593. }
  1594. template<typename TStorage>
  1595. void SetEndOfDay(TStorage& storage) {
  1596. storage.Hour = 23;
  1597. storage.Minute = 59;
  1598. storage.Second = 59;
  1599. storage.Microsecond = 999999;
  1600. }
  1601. template<typename TStorage>
  1602. TMaybe<TStorage> StartOfYear(TStorage storage, const IValueBuilder& valueBuilder) {
  1603. storage.Month = 1;
  1604. storage.Day = 1;
  1605. SetStartOfDay(storage);
  1606. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1607. return {};
  1608. }
  1609. return storage;
  1610. }
  1611. template<typename TStorage>
  1612. TMaybe<TStorage> EndOfYear(TStorage storage, const IValueBuilder& valueBuilder) {
  1613. storage.Month = 12;
  1614. storage.Day = 31;
  1615. SetEndOfDay(storage);
  1616. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1617. return {};
  1618. }
  1619. return storage;
  1620. }
  1621. template<typename TStorage>
  1622. TMaybe<TStorage> StartOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) {
  1623. storage.Month = (storage.Month - 1) / 3 * 3 + 1;
  1624. storage.Day = 1;
  1625. SetStartOfDay(storage);
  1626. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1627. return {};
  1628. }
  1629. return storage;
  1630. }
  1631. template<typename TStorage>
  1632. TMaybe<TStorage> EndOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) {
  1633. storage.Month = ((storage.Month - 1) / 3 + 1) * 3;
  1634. storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
  1635. SetEndOfDay(storage);
  1636. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1637. return {};
  1638. }
  1639. return storage;
  1640. }
  1641. template<typename TStorage>
  1642. TMaybe<TStorage> StartOfMonth(TStorage storage, const IValueBuilder& valueBuilder) {
  1643. storage.Day = 1;
  1644. SetStartOfDay(storage);
  1645. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1646. return {};
  1647. }
  1648. return storage;
  1649. }
  1650. template<typename TStorage>
  1651. TMaybe<TStorage> EndOfMonth(TStorage storage, const IValueBuilder& valueBuilder) {
  1652. storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
  1653. SetEndOfDay(storage);
  1654. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1655. return {};
  1656. }
  1657. return storage;
  1658. }
  1659. template<typename TStorage>
  1660. TMaybe<TStorage> StartOfWeek(TStorage storage, const IValueBuilder& valueBuilder) {
  1661. const ui32 shift = 86400u * (storage.DayOfWeek - 1u);
  1662. if constexpr (std::is_same_v<TStorage, TTMStorage>) {
  1663. if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) {
  1664. return {};
  1665. }
  1666. storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId);
  1667. } else {
  1668. if (shift > storage.ToDatetime64(valueBuilder.GetDateBuilder())) {
  1669. return {};
  1670. }
  1671. storage.FromDatetime64(valueBuilder.GetDateBuilder(), storage.ToDatetime64(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId);
  1672. }
  1673. SetStartOfDay(storage);
  1674. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1675. return {};
  1676. }
  1677. return storage;
  1678. }
  1679. template<typename TStorage>
  1680. TMaybe<TStorage> EndOfWeek(TStorage storage, const IValueBuilder& valueBuilder) {
  1681. const ui32 shift = 86400u * (7u - storage.DayOfWeek);
  1682. if constexpr (std::is_same_v<TStorage, TTMStorage>) {
  1683. auto dt = storage.ToDatetime(valueBuilder.GetDateBuilder());
  1684. if (NUdf::MAX_DATETIME - shift <= dt) {
  1685. return {};
  1686. }
  1687. storage.FromDatetime(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId);
  1688. } else {
  1689. auto dt = storage.ToDatetime64(valueBuilder.GetDateBuilder());
  1690. if (NUdf::MAX_DATETIME64 - shift <= dt) {
  1691. return {};
  1692. }
  1693. storage.FromDatetime64(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId);
  1694. }
  1695. SetEndOfDay(storage);
  1696. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1697. return {};
  1698. }
  1699. return storage;
  1700. }
  1701. template<typename TStorage>
  1702. TMaybe<TStorage> StartOfDay(TStorage storage, const IValueBuilder& valueBuilder) {
  1703. SetStartOfDay(storage);
  1704. auto& builder = valueBuilder.GetDateBuilder();
  1705. if (!storage.Validate(builder)) {
  1706. return {};
  1707. }
  1708. return storage;
  1709. }
  1710. template<typename TStorage>
  1711. TMaybe<TStorage> EndOfDay(TStorage storage, const IValueBuilder& valueBuilder) {
  1712. SetEndOfDay(storage);
  1713. auto& builder = valueBuilder.GetDateBuilder();
  1714. if (!storage.Validate(builder)) {
  1715. return {};
  1716. }
  1717. return storage;
  1718. }
  1719. TMaybe<TTMStorage> StartOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) {
  1720. if (interval >= 86400000000ull) {
  1721. // treat as StartOfDay
  1722. SetStartOfDay(storage);
  1723. } else {
  1724. auto current = storage.ToTimeOfDay();
  1725. auto rounded = current / interval * interval;
  1726. storage.FromTimeOfDay(rounded);
  1727. }
  1728. auto& builder = valueBuilder.GetDateBuilder();
  1729. if (!storage.Validate(builder)) {
  1730. return {};
  1731. }
  1732. return storage;
  1733. }
  1734. TMaybe<TTMStorage> EndOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) {
  1735. if (interval >= 86400000000ull) {
  1736. // treat as EndOfDay
  1737. SetEndOfDay(storage);
  1738. } else {
  1739. auto current = storage.ToTimeOfDay();
  1740. auto rounded = current / interval * interval + interval - 1;
  1741. storage.FromTimeOfDay(rounded);
  1742. }
  1743. auto& builder = valueBuilder.GetDateBuilder();
  1744. if (!storage.Validate(builder)) {
  1745. return {};
  1746. }
  1747. return storage;
  1748. }
  1749. template<bool UseEnd>
  1750. struct TStartEndOfBinaryKernelExec : TBinaryKernelExec<TStartEndOfBinaryKernelExec<UseEnd>> {
  1751. template<typename TSink>
  1752. static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
  1753. auto& storage = Reference(arg1);
  1754. ui64 interval = std::abs(arg2.Get<i64>());
  1755. if (interval == 0) {
  1756. sink(arg1);
  1757. return;
  1758. }
  1759. if (auto res = (UseEnd ? EndOf : StartOf)(storage, interval, *valueBuilder)) {
  1760. storage = res.GetRef();
  1761. sink(arg1);
  1762. } else {
  1763. sink(TBlockItem{});
  1764. }
  1765. }
  1766. };
  1767. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) {
  1768. auto result = args[0];
  1769. ui64 interval = std::abs(args[1].Get<i64>());
  1770. if (interval == 0) {
  1771. return result;
  1772. }
  1773. if (auto res = StartOf(Reference(result), interval, *valueBuilder)) {
  1774. Reference(result) = res.GetRef();
  1775. return result;
  1776. }
  1777. return TUnboxedValuePod{};
  1778. }
  1779. END_SIMPLE_ARROW_UDF(TStartOf, TStartEndOfBinaryKernelExec<false>::Do);
  1780. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) {
  1781. auto result = args[0];
  1782. ui64 interval = std::abs(args[1].Get<i64>());
  1783. if (interval == 0) {
  1784. return result;
  1785. }
  1786. if (auto res = EndOf(Reference(result), interval, *valueBuilder)) {
  1787. Reference(result) = res.GetRef();
  1788. return result;
  1789. }
  1790. return TUnboxedValuePod{};
  1791. }
  1792. END_SIMPLE_ARROW_UDF(TEndOf, TStartEndOfBinaryKernelExec<true>::Do);
  1793. struct TTimeOfDayKernelExec : TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> {
  1794. template<typename TSink>
  1795. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1796. Y_UNUSED(valueBuilder);
  1797. auto& storage = Reference(item);
  1798. sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()});
  1799. }
  1800. };
  1801. const auto timeOfDayKernelExecDo = TTimeOfDayKernelExec::Do;
  1802. BEGIN_SIMPLE_STRICT_ARROW_UDF(TTimeOfDay, TInterval(TAutoMap<TResource<TMResourceName>>)) {
  1803. Y_UNUSED(valueBuilder);
  1804. auto& storage = Reference(args[0]);
  1805. return TUnboxedValuePod((i64)storage.ToTimeOfDay());
  1806. }
  1807. END_SIMPLE_ARROW_UDF(TTimeOfDay, timeOfDayKernelExecDo);
  1808. // Add ...
  1809. template<auto Core>
  1810. struct TAddKernelExec : TBinaryKernelExec<TAddKernelExec<Core>> {
  1811. template<typename TSink>
  1812. static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) {
  1813. sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder()));
  1814. }
  1815. };
  1816. template<const char* TUdfName, auto Shifter, auto WShifter>
  1817. class TShift : public TBoxedValue {
  1818. public:
  1819. typedef bool TTypeAwareMarker;
  1820. static const TStringRef& Name() {
  1821. static auto name = TStringRef(TUdfName, std::strlen(TUdfName));
  1822. return name;
  1823. }
  1824. static bool DeclareSignature(
  1825. const TStringRef& name,
  1826. TType* userType,
  1827. IFunctionTypeInfoBuilder& builder,
  1828. bool typesOnly)
  1829. {
  1830. if (Name() != name) {
  1831. return false;
  1832. }
  1833. if (!userType) {
  1834. builder.SetError("User type is missing");
  1835. return true;
  1836. }
  1837. builder.UserType(userType);
  1838. const auto typeInfoHelper = builder.TypeInfoHelper();
  1839. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  1840. Y_ENSURE(tuple, "Tuple with args and options tuples expected");
  1841. Y_ENSURE(tuple.GetElementsCount() > 0,
  1842. "Tuple has to contain positional arguments");
  1843. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  1844. Y_ENSURE(argsTuple, "Tuple with args expected");
  1845. if (argsTuple.GetElementsCount() != 2) {
  1846. builder.SetError("Only two arguments expected");
  1847. return true;
  1848. }
  1849. auto argType = argsTuple.GetElementType(0);
  1850. if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  1851. argType = optType.GetItemType();
  1852. }
  1853. TResourceTypeInspector resource(*typeInfoHelper, argType);
  1854. if (!resource) {
  1855. TDataTypeInspector data(*typeInfoHelper, argType);
  1856. if (!data) {
  1857. SetInvalidTypeError(builder, typeInfoHelper, argType);
  1858. return true;
  1859. }
  1860. const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
  1861. if (features & NUdf::BigDateType) {
  1862. BuildSignature<TM64ResourceName, WShifter>(builder, typesOnly);
  1863. return true;
  1864. }
  1865. if (features & (NUdf::DateType | NUdf::TzDateType)) {
  1866. BuildSignature<TMResourceName, Shifter>(builder, typesOnly);
  1867. return true;
  1868. }
  1869. SetInvalidTypeError(builder, typeInfoHelper, argType);
  1870. return true;
  1871. }
  1872. if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) {
  1873. BuildSignature<TM64ResourceName, WShifter>(builder, typesOnly);
  1874. return true;
  1875. }
  1876. if (resource.GetTag() == TStringRef::Of(TMResourceName)) {
  1877. BuildSignature<TMResourceName, Shifter>(builder, typesOnly);
  1878. return true;
  1879. }
  1880. ::TStringBuilder sb;
  1881. sb << "Unexpected Resource tag: got '" << resource.GetTag() << "'";
  1882. builder.SetError(sb);
  1883. return true;
  1884. }
  1885. private:
  1886. template<auto ShiftHanler>
  1887. class TImpl : public TBoxedValue {
  1888. public:
  1889. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  1890. return ShiftHanler(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1891. }
  1892. };
  1893. static void SetInvalidTypeError(NUdf::IFunctionTypeInfoBuilder& builder,
  1894. ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType)
  1895. {
  1896. ::TStringBuilder sb;
  1897. sb << "Invalid argument type: got ";
  1898. TTypePrinter(*typeInfoHelper, argType).Out(sb.Out);
  1899. sb << ", but Resource<" << TMResourceName <<"> or Resource<"
  1900. << TM64ResourceName << "> expected";
  1901. builder.SetError(sb);
  1902. }
  1903. template<const char* TResourceName, auto ShiftHandler>
  1904. static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) {
  1905. builder.Returns<TOptional<TResource<TResourceName>>>();
  1906. builder.Args()->Add<TAutoMap<TResource<TResourceName>>>().template Add<i32>();
  1907. builder.IsStrict();
  1908. if (!typesOnly) {
  1909. builder.Implementation(new TImpl<ShiftHandler>());
  1910. }
  1911. }
  1912. };
  1913. template<size_t Digits, bool Exacly = true>
  1914. struct PrintNDigits;
  1915. template<bool Exacly>
  1916. struct PrintNDigits<0U, Exacly> {
  1917. static constexpr ui32 Miltiplier = 1U;
  1918. template <typename T>
  1919. static constexpr size_t Do(T, char*) { return 0U; }
  1920. };
  1921. template<size_t Digits, bool Exacly>
  1922. struct PrintNDigits {
  1923. using TNextPrint = PrintNDigits<Digits - 1U, Exacly>;
  1924. static constexpr ui32 Miltiplier = TNextPrint::Miltiplier * 10U;
  1925. template <typename T>
  1926. static constexpr size_t Do(T in, char* out) {
  1927. in %= Miltiplier;
  1928. if (Exacly || in) {
  1929. *out = "0123456789"[in / TNextPrint::Miltiplier];
  1930. return 1U + TNextPrint::Do(in, ++out);
  1931. }
  1932. return 0U;
  1933. }
  1934. };
  1935. // Format
  1936. class TFormat : public TBoxedValue {
  1937. public:
  1938. explicit TFormat(TSourcePosition pos)
  1939. : Pos_(pos)
  1940. {}
  1941. static const TStringRef& Name() {
  1942. static auto name = TStringRef::Of("Format");
  1943. return name;
  1944. }
  1945. static bool DeclareSignature(
  1946. const TStringRef& name,
  1947. TType*,
  1948. IFunctionTypeInfoBuilder& builder,
  1949. bool typesOnly)
  1950. {
  1951. if (Name() != name) {
  1952. return false;
  1953. }
  1954. auto resourceType = builder.Resource(TMResourceName);
  1955. auto stringType = builder.SimpleType<char*>();
  1956. auto boolType = builder.SimpleType<bool>();
  1957. auto optionalBoolType = builder.Optional()->Item(boolType).Build();
  1958. auto args = builder.Args();
  1959. args->Add(stringType);
  1960. args->Add(optionalBoolType).Name("AlwaysWriteFractionalSeconds");
  1961. args->Done();
  1962. builder.OptionalArgs(1);
  1963. builder.Returns(
  1964. builder.Callable(1)
  1965. ->Returns(stringType)
  1966. .Arg(resourceType)
  1967. .Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1968. .Build()
  1969. );
  1970. if (!typesOnly) {
  1971. builder.Implementation(new TFormat(builder.GetSourcePosition()));
  1972. }
  1973. return true;
  1974. }
  1975. private:
  1976. using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>;
  1977. struct TDataPrinter {
  1978. const std::string_view Data;
  1979. size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const {
  1980. std::memcpy(out, Data.data(), Data.size());
  1981. return Data.size();
  1982. }
  1983. };
  1984. TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
  1985. bool alwaysWriteFractionalSeconds = false;
  1986. if (auto val = args[1]) {
  1987. alwaysWriteFractionalSeconds = val.Get<bool>();
  1988. }
  1989. return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds));
  1990. } catch (const std::exception& e) {
  1991. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1992. }
  1993. class TImpl : public TBoxedValue {
  1994. public:
  1995. TUnboxedValue Run(
  1996. const IValueBuilder* valueBuilder,
  1997. const TUnboxedValuePod* args) const override
  1998. {
  1999. try {
  2000. EMPTY_RESULT_ON_EMPTY_ARG(0);
  2001. const auto value = args[0];
  2002. auto& builder = valueBuilder->GetDateBuilder();
  2003. auto result = valueBuilder->NewStringNotFilled(ReservedSize_);
  2004. auto pos = result.AsStringRef().Data();
  2005. ui32 size = 0U;
  2006. for (const auto& printer : Printers_) {
  2007. if (const auto plus = printer(pos, value, builder)) {
  2008. size += plus;
  2009. pos += plus;
  2010. }
  2011. }
  2012. if (size < ReservedSize_) {
  2013. result = valueBuilder->SubString(result.Release(), 0U, size);
  2014. }
  2015. return result;
  2016. } catch (const std::exception& e) {
  2017. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  2018. }
  2019. }
  2020. TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds)
  2021. : Pos_(pos)
  2022. , Format_(format)
  2023. {
  2024. const std::string_view formatView(Format_.AsStringRef());
  2025. auto dataStart = formatView.begin();
  2026. size_t dataSize = 0U;
  2027. for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
  2028. if (*ptr != '%') {
  2029. ++dataSize;
  2030. continue;
  2031. }
  2032. if (dataSize) {
  2033. Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)});
  2034. ReservedSize_ += dataSize;
  2035. dataSize = 0U;
  2036. }
  2037. if (formatView.end() == ++ptr) {
  2038. ythrow yexception() << "format string ends with single %%";
  2039. }
  2040. switch (*ptr) {
  2041. case '%': {
  2042. static constexpr size_t size = 1;
  2043. Printers_.emplace_back([](char* out, const TUnboxedValuePod&, const IDateBuilder&) {
  2044. *out = '%';
  2045. return size;
  2046. });
  2047. ReservedSize_ += size;
  2048. break;
  2049. }
  2050. case 'Y': {
  2051. static constexpr size_t size = 4;
  2052. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2053. return PrintNDigits<size>::Do(GetYear(value), out);
  2054. });
  2055. ReservedSize_ += size;
  2056. break;
  2057. }
  2058. case 'm': {
  2059. static constexpr size_t size = 2;
  2060. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2061. return PrintNDigits<size>::Do(GetMonth(value), out);
  2062. });
  2063. ReservedSize_ += size;
  2064. break;
  2065. }
  2066. case 'd': {
  2067. static constexpr size_t size = 2;
  2068. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2069. return PrintNDigits<size>::Do(GetDay(value), out);
  2070. });
  2071. ReservedSize_ += size;
  2072. break;
  2073. }
  2074. case 'H': {
  2075. static constexpr size_t size = 2;
  2076. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2077. return PrintNDigits<size>::Do(GetHour(value), out);
  2078. });
  2079. ReservedSize_ += size;
  2080. break;
  2081. }
  2082. case 'M': {
  2083. static constexpr size_t size = 2;
  2084. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2085. return PrintNDigits<size>::Do(GetMinute(value), out);
  2086. });
  2087. ReservedSize_ += size;
  2088. break;
  2089. }
  2090. case 'S':
  2091. Printers_.emplace_back([alwaysWriteFractionalSeconds](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2092. constexpr size_t size = 2;
  2093. if (const auto microsecond = GetMicrosecond(value); microsecond || alwaysWriteFractionalSeconds) {
  2094. out += PrintNDigits<size>::Do(GetSecond(value), out);
  2095. *out++ = '.';
  2096. constexpr size_t msize = 6;
  2097. auto addSz = alwaysWriteFractionalSeconds ?
  2098. PrintNDigits<msize, true>::Do(microsecond, out) :
  2099. PrintNDigits<msize, false>::Do(microsecond, out);
  2100. return size + 1U + addSz;
  2101. }
  2102. return PrintNDigits<size>::Do(GetSecond(value), out);
  2103. });
  2104. ReservedSize_ += 9;
  2105. break;
  2106. case 'z': {
  2107. static constexpr size_t size = 5;
  2108. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder& builder) {
  2109. auto timezoneId = GetTimezoneId(value);
  2110. if (TTMStorage::IsUniversal(timezoneId)) {
  2111. std::memcpy(out, "+0000", size);
  2112. return size;
  2113. }
  2114. i32 shift;
  2115. if (!builder.GetTimezoneShift(GetYear(value), GetMonth(value), GetDay(value),
  2116. GetHour(value), GetMinute(value), GetSecond(value), timezoneId, shift))
  2117. {
  2118. std::memcpy(out, "+0000", size);
  2119. return size;
  2120. }
  2121. *out++ = shift > 0 ? '+' : '-';
  2122. shift = std::abs(shift);
  2123. out += PrintNDigits<2U>::Do(shift / 60U, out);
  2124. out += PrintNDigits<2U>::Do(shift % 60U, out);
  2125. return size;
  2126. });
  2127. ReservedSize_ += size;
  2128. break;
  2129. }
  2130. case 'Z':
  2131. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2132. const auto timezoneId = GetTimezoneId(value);
  2133. const auto tzName = NUdf::GetTimezones()[timezoneId];
  2134. std::memcpy(out, tzName.data(), std::min(tzName.size(), MAX_TIMEZONE_NAME_LEN));
  2135. return tzName.size();
  2136. });
  2137. ReservedSize_ += MAX_TIMEZONE_NAME_LEN;
  2138. break;
  2139. case 'b': {
  2140. static constexpr size_t size = 3;
  2141. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2142. static constexpr std::string_view mp[] {
  2143. "Jan",
  2144. "Feb",
  2145. "Mar",
  2146. "Apr",
  2147. "May",
  2148. "Jun",
  2149. "Jul",
  2150. "Aug",
  2151. "Sep",
  2152. "Oct",
  2153. "Nov",
  2154. "Dec"
  2155. };
  2156. auto month = GetMonth(value);
  2157. Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
  2158. std::memcpy(out, mp[month - 1].data(), size);
  2159. return size;
  2160. });
  2161. ReservedSize_ += size;
  2162. break;
  2163. }
  2164. case 'B': {
  2165. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  2166. static constexpr std::string_view mp[] {
  2167. "January",
  2168. "February",
  2169. "March",
  2170. "April",
  2171. "May",
  2172. "June",
  2173. "July",
  2174. "August",
  2175. "September",
  2176. "October",
  2177. "November",
  2178. "December"
  2179. };
  2180. auto month = GetMonth(value);
  2181. Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
  2182. const std::string_view monthFullName = mp[month - 1];
  2183. std::memcpy(out, monthFullName.data(), monthFullName.size());
  2184. return monthFullName.size();
  2185. });
  2186. ReservedSize_ += 9U; // MAX_MONTH_FULL_NAME_LEN
  2187. break;
  2188. }
  2189. default:
  2190. ythrow yexception() << "invalid format character: " << *ptr;
  2191. }
  2192. dataStart = ptr + 1U;
  2193. }
  2194. if (dataSize) {
  2195. Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)});
  2196. ReservedSize_ += dataSize;
  2197. }
  2198. }
  2199. private:
  2200. const TSourcePosition Pos_;
  2201. TUnboxedValue Format_;
  2202. TPrintersList Printers_{};
  2203. size_t ReservedSize_ = 0;
  2204. };
  2205. const TSourcePosition Pos_;
  2206. };
  2207. template<size_t Digits>
  2208. struct ParseExaclyNDigits;
  2209. template<>
  2210. struct ParseExaclyNDigits<0U> {
  2211. template <typename T>
  2212. static constexpr bool Do(std::string_view::const_iterator&, T&) {
  2213. return true;
  2214. }
  2215. };
  2216. template<size_t Digits>
  2217. struct ParseExaclyNDigits {
  2218. template <typename T>
  2219. static constexpr bool Do(std::string_view::const_iterator& it, T& out) {
  2220. const auto d = *it;
  2221. if (!std::isdigit(d)) {
  2222. return false;
  2223. }
  2224. out *= 10U;
  2225. out += d - '0';
  2226. return ParseExaclyNDigits<Digits - 1U>::Do(++it, out);
  2227. }
  2228. };
  2229. // Parse
  2230. class TParse : public TBoxedValue {
  2231. public:
  2232. class TFactory : public TBoxedValue {
  2233. public:
  2234. explicit TFactory(TSourcePosition pos)
  2235. : Pos_(pos)
  2236. {}
  2237. private:
  2238. TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
  2239. return TUnboxedValuePod(new TParse(args[0], Pos_));
  2240. } catch (const std::exception& e) {
  2241. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  2242. }
  2243. const TSourcePosition Pos_;
  2244. };
  2245. static const TStringRef& Name() {
  2246. static auto name = TStringRef::Of("Parse");
  2247. return name;
  2248. }
  2249. static bool DeclareSignature(
  2250. const TStringRef& name,
  2251. TType*,
  2252. IFunctionTypeInfoBuilder& builder,
  2253. bool typesOnly)
  2254. {
  2255. if (Name() != name) {
  2256. return false;
  2257. }
  2258. auto resourceType = builder.Resource(TMResourceName);
  2259. auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
  2260. builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap)
  2261. .Add(builder.Optional()->Item<ui16>())
  2262. .Done()
  2263. .OptionalArgs(1);
  2264. builder.RunConfig<char*>().Returns(optionalResourceType);
  2265. if (!typesOnly) {
  2266. builder.Implementation(new TParse::TFactory(builder.GetSourcePosition()));
  2267. }
  2268. return true;
  2269. }
  2270. private:
  2271. const TSourcePosition Pos_;
  2272. const TUnboxedValue Format_;
  2273. std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_;
  2274. struct TDataScanner {
  2275. const std::string_view Data_;
  2276. bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const {
  2277. if (limit < Data_.size() || !std::equal(Data_.begin(), Data_.end(), it)) {
  2278. return false;
  2279. }
  2280. std::advance(it, Data_.size());
  2281. return true;
  2282. }
  2283. };
  2284. TUnboxedValue Run(
  2285. const IValueBuilder* valueBuilder,
  2286. const TUnboxedValuePod* args) const override
  2287. {
  2288. try {
  2289. EMPTY_RESULT_ON_EMPTY_ARG(0);
  2290. const std::string_view buffer = args[0].AsStringRef();
  2291. TUnboxedValuePod result(0);
  2292. auto& storage = Reference(result);
  2293. storage.MakeDefault();
  2294. auto& builder = valueBuilder->GetDateBuilder();
  2295. auto it = buffer.begin();
  2296. for (const auto& scanner : Scanners_) {
  2297. if (!scanner(it, std::distance(it, buffer.end()), result, builder)) {
  2298. return TUnboxedValuePod();
  2299. }
  2300. }
  2301. if (buffer.end() != it || !storage.Validate(builder)) {
  2302. return TUnboxedValuePod();
  2303. }
  2304. return result;
  2305. } catch (const std::exception& e) {
  2306. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  2307. }
  2308. }
  2309. TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos)
  2310. : Pos_(pos)
  2311. , Format_(runConfig)
  2312. {
  2313. const std::string_view formatView(Format_.AsStringRef());
  2314. auto dataStart = formatView.begin();
  2315. size_t dataSize = 0U;
  2316. for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
  2317. if (*ptr != '%') {
  2318. ++dataSize;
  2319. continue;
  2320. }
  2321. if (dataSize) {
  2322. Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
  2323. dataSize = 0;
  2324. }
  2325. if (++ptr == formatView.end()) {
  2326. ythrow yexception() << "format string ends with single %%";
  2327. }
  2328. switch (*ptr) {
  2329. case '%':
  2330. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) {
  2331. return limit > 0U && *it++ == '%';
  2332. });
  2333. break;
  2334. case 'Y': {
  2335. static constexpr size_t size = 4;
  2336. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2337. ui32 year = 0U;
  2338. if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) {
  2339. return false;
  2340. }
  2341. SetYear(result, year);
  2342. return true;
  2343. });
  2344. break;
  2345. }
  2346. case 'm': {
  2347. static constexpr size_t size = 2;
  2348. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2349. ui32 month = 0U;
  2350. if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
  2351. return false;
  2352. }
  2353. SetMonth(result, month);
  2354. return true;
  2355. });
  2356. break;
  2357. }
  2358. case 'd': {
  2359. static constexpr size_t size = 2;
  2360. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2361. ui32 day = 0U;
  2362. if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) {
  2363. return false;
  2364. }
  2365. SetDay(result, day);
  2366. return true;
  2367. });
  2368. break;
  2369. }
  2370. case 'H': {
  2371. static constexpr size_t size = 2;
  2372. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2373. ui32 hour = 0U;
  2374. if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
  2375. return false;
  2376. }
  2377. SetHour(result, hour);
  2378. return true;
  2379. });
  2380. break;
  2381. }
  2382. case 'M': {
  2383. static constexpr size_t size = 2;
  2384. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2385. ui32 minute = 0U;
  2386. if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
  2387. return false;
  2388. }
  2389. SetMinute(result, minute);
  2390. return true;
  2391. });
  2392. break;
  2393. }
  2394. case 'S': {
  2395. static constexpr size_t size = 2;
  2396. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2397. ui32 second = 0U;
  2398. if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
  2399. return false;
  2400. }
  2401. SetSecond(result, second);
  2402. limit -= size;
  2403. if (!limit || *it != '.') {
  2404. return true;
  2405. }
  2406. ++it;
  2407. --limit;
  2408. ui32 usec = 0U;
  2409. size_t digits = 6U;
  2410. for (; limit; --limit) {
  2411. const auto c = *it;
  2412. if (!digits || !std::isdigit(c)) {
  2413. break;
  2414. }
  2415. usec *= 10U;
  2416. usec += c - '0';
  2417. ++it;
  2418. --digits;
  2419. }
  2420. for (; !digits && limit && std::isdigit(*it); --limit, ++it);
  2421. while (digits--) {
  2422. usec *= 10U;
  2423. }
  2424. SetMicrosecond(result, usec);
  2425. return true;
  2426. });
  2427. break;
  2428. }
  2429. case 'Z':
  2430. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder& builder) {
  2431. const auto start = it;
  2432. while (limit > 0 && (std::isalnum(*it) || *it == '/' || *it == '_' || *it == '-' || *it == '+')) {
  2433. ++it;
  2434. --limit;
  2435. }
  2436. const auto size = std::distance(start, it);
  2437. ui32 timezoneId;
  2438. if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) {
  2439. return false;
  2440. }
  2441. SetTimezoneId(result, timezoneId);
  2442. return true;
  2443. });
  2444. break;
  2445. case 'b': {
  2446. static constexpr size_t size = 3;
  2447. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2448. const auto start = it;
  2449. size_t cnt = 0U;
  2450. while (limit > 0 && cnt < size && std::isalpha(*it)) {
  2451. ++it;
  2452. ++cnt;
  2453. --limit;
  2454. }
  2455. const std::string_view monthName{start, cnt};
  2456. ui8 month = 0U;
  2457. if (cnt < size || !ValidateMonthShortName(monthName, month)) {
  2458. return false;
  2459. }
  2460. SetMonth(result, month);
  2461. return true;
  2462. });
  2463. break;
  2464. }
  2465. case 'B': {
  2466. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  2467. const auto start = it;
  2468. size_t cnt = 0U;
  2469. while (limit > 0 && std::isalpha(*it)) {
  2470. ++it;
  2471. ++cnt;
  2472. --limit;
  2473. }
  2474. const std::string_view monthName{start, cnt};
  2475. ui8 month = 0U;
  2476. if (!ValidateMonthFullName(monthName, month)) {
  2477. return false;
  2478. }
  2479. SetMonth(result, month);
  2480. return true;
  2481. });
  2482. break;
  2483. }
  2484. default:
  2485. ythrow yexception() << "invalid format character: " << *ptr;
  2486. }
  2487. dataStart = ptr + 1U;
  2488. }
  2489. if (dataSize) {
  2490. Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
  2491. }
  2492. }
  2493. };
  2494. #define PARSE_SPECIFIC_FORMAT(format) \
  2495. SIMPLE_STRICT_UDF(TParse##format, TOptional<TResource<TMResourceName>>(TAutoMap<char*>)) { \
  2496. auto str = args[0].AsStringRef(); \
  2497. TInstant instant; \
  2498. if (!TInstant::TryParse##format(TStringBuf(str.Data(), str.Size()), instant) || instant.Seconds() >= NUdf::MAX_DATETIME) { \
  2499. return TUnboxedValuePod(); \
  2500. } \
  2501. auto& builder = valueBuilder->GetDateBuilder(); \
  2502. TUnboxedValuePod result(0); \
  2503. auto& storage = Reference(result); \
  2504. storage.FromTimestamp(builder, instant.MicroSeconds()); \
  2505. return result; \
  2506. }
  2507. PARSE_SPECIFIC_FORMAT(Rfc822);
  2508. PARSE_SPECIFIC_FORMAT(Iso8601);
  2509. PARSE_SPECIFIC_FORMAT(Http);
  2510. PARSE_SPECIFIC_FORMAT(X509);
  2511. SIMPLE_MODULE(TDateTime2Module,
  2512. TUserDataTypeFuncFactory<true, true, SplitUDF, TSplit,
  2513. TDate,
  2514. TDatetime,
  2515. TTimestamp,
  2516. TTzDate,
  2517. TTzDatetime,
  2518. TTzTimestamp,
  2519. TDate32,
  2520. TDatetime64,
  2521. TTimestamp64>,
  2522. TMakeDate,
  2523. TMakeDatetime,
  2524. TMakeTimestamp,
  2525. TMakeTzDate,
  2526. TMakeTzDatetime,
  2527. TMakeTzTimestamp,
  2528. TConvert,
  2529. TMakeDate32,
  2530. TMakeDatetime64,
  2531. TMakeTimestamp64,
  2532. TGetDateComponent<GetYearUDF, ui16, GetYear<TMResourceName>, i32, GetYear<TM64ResourceName>>,
  2533. TGetDateComponent<GetDayOfYearUDF, ui16, GetDayOfYear<TMResourceName>, ui16, GetDayOfYear<TM64ResourceName>>,
  2534. TGetDateComponent<GetMonthUDF, ui8, GetMonth<TMResourceName>, ui8, GetMonth<TM64ResourceName>>,
  2535. TGetDateComponentName<GetMonthNameUDF, GetMonthName<TMResourceName>, GetMonthName<TM64ResourceName>>,
  2536. TGetDateComponent<GetWeekOfYearUDF, ui8, GetWeekOfYear<TMResourceName>, ui8, GetWeekOfYear<TM64ResourceName>>,
  2537. TGetDateComponent<GetWeekOfYearIso8601UDF, ui8, GetWeekOfYearIso8601<TMResourceName>, ui8, GetWeekOfYearIso8601<TM64ResourceName>>,
  2538. TGetDateComponent<GetDayOfMonthUDF, ui8, GetDay<TMResourceName>, ui8, GetDay<TM64ResourceName>>,
  2539. TGetDateComponent<GetDayOfWeekUDF, ui8, GetDayOfWeek<TMResourceName>, ui8, GetDayOfWeek<TM64ResourceName>>,
  2540. TGetDateComponentName<GetDayOfWeekNameUDF, GetDayOfWeekName<TMResourceName>, GetDayOfWeekName<TM64ResourceName>>,
  2541. TGetTimeComponent<GetHourUDF, ui8, GetHour<TMResourceName>, GetHour<TM64ResourceName>, 1u, 3600u, 24u, false>,
  2542. TGetTimeComponent<GetMinuteUDF, ui8, GetMinute<TMResourceName>, GetMinute<TM64ResourceName>, 1u, 60u, 60u, false>,
  2543. TGetTimeComponent<GetSecondUDF, ui8, GetSecond<TMResourceName>, GetSecond<TM64ResourceName>, 1u, 1u, 60u, false>,
  2544. TGetTimeComponent<GetMillisecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1000u, 1000u, 1000u, true>,
  2545. TGetTimeComponent<GetMicrosecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1u, 1u, 1000000u, true>,
  2546. TGetDateComponent<GetTimezoneIdUDF, ui16, GetTimezoneId<TMResourceName>, ui16, GetTimezoneId<TM64ResourceName>>,
  2547. TGetDateComponentName<GetTimezoneNameUDF, GetTimezoneName<TMResourceName>, GetTimezoneName<TM64ResourceName>>,
  2548. TUpdate,
  2549. TFromSeconds,
  2550. TFromMilliseconds,
  2551. TFromMicroseconds,
  2552. TFromSeconds64,
  2553. TFromMilliseconds64,
  2554. TFromMicroseconds64,
  2555. TIntervalFromDays,
  2556. TIntervalFromHours,
  2557. TIntervalFromMinutes,
  2558. TIntervalFromSeconds,
  2559. TIntervalFromMilliseconds,
  2560. TIntervalFromMicroseconds,
  2561. TInterval64FromDays,
  2562. TInterval64FromHours,
  2563. TInterval64FromMinutes,
  2564. TInterval64FromSeconds,
  2565. TInterval64FromMilliseconds,
  2566. TInterval64FromMicroseconds,
  2567. TToDays,
  2568. TToHours,
  2569. TToMinutes,
  2570. TBoundaryOf<StartOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfYear<TTMStorage>>,
  2571. SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfYear<TTM64Storage>>>,
  2572. TBoundaryOf<StartOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfQuarter<TTMStorage>>,
  2573. SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfQuarter<TTM64Storage>>>,
  2574. TBoundaryOf<StartOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfMonth<TTMStorage>>,
  2575. SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfMonth<TTM64Storage>>>,
  2576. TBoundaryOf<StartOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfWeek<TTMStorage>>,
  2577. SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfWeek<TTM64Storage>>>,
  2578. TBoundaryOf<StartOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfDay<TTMStorage>>,
  2579. SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfDay<TTM64Storage>>>,
  2580. TStartOf,
  2581. TTimeOfDay,
  2582. TShift<ShiftYearsUDF, DoAddYears<TMResourceName>, DoAddYears<TM64ResourceName>>,
  2583. TShift<ShiftQuartersUDF, DoAddQuarters<TMResourceName>, DoAddQuarters<TM64ResourceName>>,
  2584. TShift<ShiftMonthsUDF, DoAddMonths<TMResourceName>, DoAddMonths<TM64ResourceName>>,
  2585. TBoundaryOf<EndOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfYear<TTMStorage>>,
  2586. SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfYear<TTM64Storage>>>,
  2587. TBoundaryOf<EndOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfQuarter<TTMStorage>>,
  2588. SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfQuarter<TTM64Storage>>>,
  2589. TBoundaryOf<EndOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfMonth<TTMStorage>>,
  2590. SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfMonth<TTM64Storage>>>,
  2591. TBoundaryOf<EndOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfWeek<TTMStorage>>,
  2592. SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfWeek<TTM64Storage>>>,
  2593. TBoundaryOf<EndOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfDay<TTMStorage>>,
  2594. SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfDay<TTM64Storage>>>,
  2595. TEndOf,
  2596. TToUnits<ToSecondsUDF, ui32, 1>,
  2597. TToUnits<ToMillisecondsUDF, ui64, 1000>,
  2598. TToUnits<ToMicrosecondsUDF, ui64, 1000000>,
  2599. TFormat,
  2600. TParse,
  2601. TParseRfc822,
  2602. TParseIso8601,
  2603. TParseHttp,
  2604. TParseX509
  2605. )
  2606. }
  2607. REGISTER_MODULES(TDateTime2Module)