datetime_udf.cpp 96 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440
  1. #include <yql/essentials/minikql/mkql_type_ops.h>
  2. #include <yql/essentials/public/udf/tz/udf_tz.h>
  3. #include <yql/essentials/public/udf/udf_helpers.h>
  4. #include <yql/essentials/minikql/datetime/datetime.h>
  5. #include <yql/essentials/minikql/datetime/datetime64.h>
  6. #include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
  7. #include <util/datetime/base.h>
  8. using namespace NKikimr;
  9. using namespace NUdf;
  10. using namespace NYql::DateTime;
  11. extern const char SplitName[] = "Split";
  12. extern const char ToSecondsName[] = "ToSeconds";
  13. extern const char ToMillisecondsName[] = "ToMilliseconds";
  14. extern const char ToMicrosecondsName[] = "ToMicroseconds";
  15. extern const char GetHourName[] = "GetHour";
  16. extern const char GetMinuteName[] = "GetMinute";
  17. extern const char GetSecondName[] = "GetSecond";
  18. extern const char GetMillisecondOfSecondName[] = "GetMillisecondOfSecond";
  19. extern const char GetMicrosecondOfSecondName[] = "GetMicrosecondOfSecond";
  20. extern const char TMResourceName[] = "DateTime2.TM";
  21. extern const char TM64ResourceName[] = "DateTime2.TM64";
  22. const auto UsecondsInDay = 86400000000ll;
  23. const auto UsecondsInHour = 3600000000ll;
  24. const auto UsecondsInMinute = 60000000ll;
  25. const auto UsecondsInSecond = 1000000ll;
  26. const auto UsecondsInMilliseconds = 1000ll;
  27. template <const char* TFuncName, typename TResult, ui32 ScaleAfterSeconds>
  28. class TToUnits {
  29. public:
  30. typedef bool TTypeAwareMarker;
  31. using TSignedResult = typename std::make_signed<TResult>::type;
  32. static TResult DateCore(ui16 value) {
  33. return value * ui32(86400) * TResult(ScaleAfterSeconds);
  34. }
  35. template<typename TTzDate>
  36. static TResult TzBlockCore(TBlockItem tzDate);
  37. template<>
  38. static TResult TzBlockCore<TTzDate>(TBlockItem tzDate) {
  39. return DateCore(tzDate.Get<ui16>());
  40. }
  41. template<>
  42. static TResult TzBlockCore<TTzDatetime>(TBlockItem tzDate) {
  43. return DatetimeCore(tzDate.Get<ui32>());
  44. }
  45. template<>
  46. static TResult TzBlockCore<TTzTimestamp>(TBlockItem tzDate) {
  47. return TimestampCore(tzDate.Get<ui64>());
  48. }
  49. static TResult DatetimeCore(ui32 value) {
  50. return value * TResult(ScaleAfterSeconds);
  51. }
  52. static TResult TimestampCore(ui64 value) {
  53. return TResult(value / (1000000u / ScaleAfterSeconds));
  54. }
  55. static TSignedResult IntervalCore(i64 value) {
  56. return TSignedResult(value / (1000000u / ScaleAfterSeconds));
  57. }
  58. static const TStringRef& Name() {
  59. static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
  60. return name;
  61. }
  62. template<typename TTzDate, typename TOutput>
  63. static auto MakeTzBlockExec() {
  64. using TReader = TTzDateBlockReader<TTzDate, /*Nullable*/ false>;
  65. return UnaryPreallocatedReaderExecImpl<TReader, TOutput, TzBlockCore<TTzDate>>;
  66. }
  67. static bool DeclareSignature(
  68. const TStringRef& name,
  69. TType* userType,
  70. IFunctionTypeInfoBuilder& builder,
  71. bool typesOnly)
  72. {
  73. if (Name() != name) {
  74. return false;
  75. }
  76. try {
  77. auto typeInfoHelper = builder.TypeInfoHelper();
  78. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  79. Y_ENSURE(tuple);
  80. Y_ENSURE(tuple.GetElementsCount() > 0);
  81. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  82. Y_ENSURE(argsTuple);
  83. if (argsTuple.GetElementsCount() != 1) {
  84. builder.SetError("Expected one argument");
  85. return true;
  86. }
  87. auto argType = argsTuple.GetElementType(0);
  88. TVector<const TType*> argBlockTypes;
  89. argBlockTypes.push_back(argType);
  90. TBlockTypeInspector block(*typeInfoHelper, argType);
  91. if (block) {
  92. Y_ENSURE(!block.IsScalar());
  93. argType = block.GetItemType();
  94. }
  95. bool isOptional = false;
  96. if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  97. argType = opt.GetItemType();
  98. isOptional = true;
  99. }
  100. TDataTypeInspector data(*typeInfoHelper, argType);
  101. if (!data) {
  102. builder.SetError("Expected data type");
  103. return true;
  104. }
  105. auto typeId = data.GetTypeId();
  106. if (!(typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id ||
  107. typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id ||
  108. typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id ||
  109. typeId == TDataType<TInterval>::Id)) {
  110. builder.SetError(TStringBuilder() << "Type " << GetDataTypeInfo(GetDataSlot(typeId)).Name << " is not supported");
  111. }
  112. builder.Args()->Add(argsTuple.GetElementType(0)).Done();
  113. const TType* retType;
  114. if (typeId != TDataType<TInterval>::Id) {
  115. retType = builder.SimpleType<TResult>();
  116. } else {
  117. retType = builder.SimpleType<TSignedResult>();
  118. }
  119. if (isOptional) {
  120. retType = builder.Optional()->Item(retType).Build();
  121. }
  122. auto outputType = retType;
  123. if (block) {
  124. retType = builder.Block(block.IsScalar())->Item(retType).Build();
  125. }
  126. builder.Returns(retType);
  127. builder.SupportsBlocks();
  128. builder.IsStrict();
  129. builder.UserType(userType);
  130. if (!typesOnly) {
  131. if (typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id) {
  132. if (block) {
  133. const auto exec = (typeId == TDataType<TTzDate>::Id)
  134. ? MakeTzBlockExec<TTzDate, TResult>()
  135. : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>;
  136. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  137. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  138. } else {
  139. builder.Implementation(new TUnaryOverOptionalImpl<ui16, TResult, DateCore>());
  140. }
  141. }
  142. if (typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id) {
  143. if (block) {
  144. const auto exec = (typeId == TDataType<TTzDatetime>::Id)
  145. ? MakeTzBlockExec<TTzDatetime, TResult>()
  146. : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>;
  147. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  148. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  149. } else {
  150. builder.Implementation(new TUnaryOverOptionalImpl<ui32, TResult, DatetimeCore>());
  151. }
  152. }
  153. if (typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id) {
  154. if (block) {
  155. const auto exec = (typeId == TDataType<TTzTimestamp>::Id)
  156. ? MakeTzBlockExec<TTzTimestamp, TResult>()
  157. : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>;
  158. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  159. exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  160. } else {
  161. builder.Implementation(new TUnaryOverOptionalImpl<ui64, TResult, TimestampCore>());
  162. }
  163. }
  164. if (typeId == TDataType<TInterval>::Id) {
  165. if (block) {
  166. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  167. UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  168. } else {
  169. builder.Implementation(new TUnaryOverOptionalImpl<i64, TSignedResult, IntervalCore>());
  170. }
  171. }
  172. }
  173. } catch (const std::exception& e) {
  174. builder.SetError(TStringBuf(e.what()));
  175. }
  176. return true;
  177. }
  178. };
  179. template <const char* TFuncName, typename TFieldStorage, TFieldStorage (*FieldFunc)(const TUnboxedValuePod&), ui32 Divisor, ui32 Scale, ui32 Limit, bool Fractional>
  180. struct TGetTimeComponent {
  181. typedef bool TTypeAwareMarker;
  182. template <typename TInput, bool AlwaysZero, bool InputFractional>
  183. static TFieldStorage Core(TInput val) {
  184. if constexpr (AlwaysZero) {
  185. return 0;
  186. }
  187. if constexpr (InputFractional) {
  188. if constexpr (Fractional) {
  189. return (val / Scale) % Limit;
  190. } else {
  191. return (val / 1000000u / Scale) % Limit;
  192. }
  193. } else {
  194. if constexpr (Fractional) {
  195. return 0;
  196. } else {
  197. return (val / Scale) % Limit;
  198. }
  199. }
  200. }
  201. class TImpl : public TBoxedValue {
  202. public:
  203. TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
  204. Y_UNUSED(valueBuilder);
  205. if (!args[0]) {
  206. return {};
  207. }
  208. return TUnboxedValuePod(TFieldStorage((FieldFunc(args[0])) / Divisor));
  209. }
  210. };
  211. static const TStringRef& Name() {
  212. static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
  213. return name;
  214. }
  215. static bool DeclareSignature(
  216. const TStringRef& name,
  217. TType* userType,
  218. IFunctionTypeInfoBuilder& builder,
  219. bool typesOnly)
  220. {
  221. if (Name() != name) {
  222. return false;
  223. }
  224. try {
  225. auto typeInfoHelper = builder.TypeInfoHelper();
  226. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  227. if (tuple) {
  228. Y_ENSURE(tuple.GetElementsCount() > 0);
  229. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  230. Y_ENSURE(argsTuple);
  231. if (argsTuple.GetElementsCount() != 1) {
  232. builder.SetError("Expected one argument");
  233. return true;
  234. }
  235. auto argType = argsTuple.GetElementType(0);
  236. TVector<const TType*> argBlockTypes;
  237. argBlockTypes.push_back(argType);
  238. TBlockTypeInspector block(*typeInfoHelper, argType);
  239. if (block) {
  240. Y_ENSURE(!block.IsScalar());
  241. argType = block.GetItemType();
  242. }
  243. bool isOptional = false;
  244. if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
  245. argType = opt.GetItemType();
  246. isOptional = true;
  247. }
  248. TResourceTypeInspector res(*typeInfoHelper, argType);
  249. if (!res) {
  250. TDataTypeInspector data(*typeInfoHelper, argType);
  251. if (!data) {
  252. builder.SetError("Expected data type");
  253. return true;
  254. }
  255. auto typeId = data.GetTypeId();
  256. if (typeId == TDataType<TDate>::Id ||
  257. typeId == TDataType<TDatetime>::Id ||
  258. typeId == TDataType<TTimestamp>::Id) {
  259. builder.Args()->Add(argsTuple.GetElementType(0)).Done();
  260. const TType* retType = builder.SimpleType<TFieldStorage>();
  261. if (isOptional) {
  262. retType = builder.Optional()->Item(retType).Build();
  263. }
  264. auto outputType = retType;
  265. if (block) {
  266. retType = builder.Block(block.IsScalar())->Item(retType).Build();
  267. }
  268. builder.Returns(retType);
  269. builder.SupportsBlocks();
  270. builder.IsStrict();
  271. builder.UserType(userType);
  272. if (!typesOnly) {
  273. if (typeId == TDataType<TDate>::Id) {
  274. if (block) {
  275. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  276. UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  277. } else {
  278. builder.Implementation(new TUnaryOverOptionalImpl<ui16, TFieldStorage, Core<ui16, true, false>>());
  279. }
  280. }
  281. if (typeId == TDataType<TDatetime>::Id) {
  282. if (block) {
  283. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  284. UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  285. } else {
  286. builder.Implementation(new TUnaryOverOptionalImpl<ui32, TFieldStorage, Core<ui32, false, false>>());
  287. }
  288. }
  289. if (typeId == TDataType<TTimestamp>::Id) {
  290. if (block) {
  291. builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
  292. UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
  293. } else {
  294. builder.Implementation(new TUnaryOverOptionalImpl<ui64, TFieldStorage, Core<ui64, false, true>>());
  295. }
  296. }
  297. }
  298. return true;
  299. }
  300. } else {
  301. Y_ENSURE(!block);
  302. if (res.GetTag() != TStringRef::Of(TMResourceName)) {
  303. builder.SetError("Unexpected resource tag");
  304. return true;
  305. }
  306. }
  307. }
  308. // default implementation
  309. builder.Args()->Add<TResource<TMResourceName>>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Done();
  310. builder.Returns<TFieldStorage>();
  311. builder.IsStrict();
  312. if (!typesOnly) {
  313. builder.Implementation(new TImpl());
  314. }
  315. } catch (const std::exception& e) {
  316. builder.SetError(TStringBuf(e.what()));
  317. }
  318. return true;
  319. }
  320. };
  321. namespace {
  322. const TTMStorage& Reference(const NUdf::TUnboxedValuePod& value) {
  323. return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr());
  324. }
  325. TTMStorage& Reference(NUdf::TUnboxedValuePod& value) {
  326. return *reinterpret_cast<TTMStorage*>(value.GetRawPtr());
  327. }
  328. const TTMStorage& Reference(const TBlockItem& value) {
  329. return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr());
  330. }
  331. Y_DECLARE_UNUSED TTMStorage& Reference(TBlockItem& value) {
  332. return *reinterpret_cast<TTMStorage*>(value.GetRawPtr());
  333. }
  334. const TTM64Storage& Reference64(const NUdf::TUnboxedValuePod& value) {
  335. return *reinterpret_cast<const TTM64Storage*>(value.GetRawPtr());
  336. }
  337. TTM64Storage& Reference64(NUdf::TUnboxedValuePod& value) {
  338. return *reinterpret_cast<TTM64Storage*>(value.GetRawPtr());
  339. }
  340. template<typename TValue>
  341. TValue DoAddMonths(const TValue& date, i64 months, const NUdf::IDateBuilder& builder) {
  342. auto result = date;
  343. auto& storage = Reference(result);
  344. if (!NYql::DateTime::DoAddMonths(storage, months, builder)) {
  345. return TValue{};
  346. }
  347. return result;
  348. }
  349. template<typename TValue>
  350. TValue DoAddQuarters(const TValue& date, i64 quarters, const NUdf::IDateBuilder& builder) {
  351. return DoAddMonths(date, quarters * 3ll, builder);
  352. }
  353. template<typename TValue>
  354. TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& builder) {
  355. auto result = date;
  356. auto& storage = Reference(result);
  357. if (!NYql::DateTime::DoAddYears(storage, years, builder)) {
  358. return TValue{};
  359. }
  360. return result;
  361. }
  362. #define ACCESSORS(field, type) \
  363. template<typename TValue> \
  364. inline type Get##field(const TValue& tm) { \
  365. return (type)Reference(tm).field; \
  366. } \
  367. template<typename TValue> \
  368. Y_DECLARE_UNUSED inline void Set##field(TValue& tm, type value) { \
  369. Reference(tm).field = value; \
  370. }
  371. ACCESSORS(Year, ui16)
  372. ACCESSORS(DayOfYear, ui16)
  373. ACCESSORS(WeekOfYear, ui8)
  374. ACCESSORS(WeekOfYearIso8601, ui8)
  375. ACCESSORS(DayOfWeek, ui8)
  376. ACCESSORS(Month, ui8)
  377. ACCESSORS(Day, ui8)
  378. ACCESSORS(Hour, ui8)
  379. ACCESSORS(Minute, ui8)
  380. ACCESSORS(Second, ui8)
  381. ACCESSORS(Microsecond, ui32)
  382. ACCESSORS(TimezoneId, ui16)
  383. #undef ACCESSORS
  384. inline bool ValidateYear(ui16 year) {
  385. return year >= NUdf::MIN_YEAR - 1 || year <= NUdf::MAX_YEAR + 1;
  386. }
  387. inline bool ValidateMonth(ui8 month) {
  388. return month >= 1 && month <= 12;
  389. }
  390. inline bool ValidateDay(ui8 day) {
  391. return day >= 1 && day <= 31;
  392. }
  393. inline bool ValidateHour(ui8 hour) {
  394. return hour < 24;
  395. }
  396. inline bool ValidateMinute(ui8 minute) {
  397. return minute < 60;
  398. }
  399. inline bool ValidateSecond(ui8 second) {
  400. return second < 60;
  401. }
  402. inline bool ValidateMicrosecond(ui32 microsecond) {
  403. return microsecond < 1000000;
  404. }
  405. inline bool ValidateTimezoneId(ui16 timezoneId) {
  406. const auto& zones = NUdf::GetTimezones();
  407. return timezoneId < zones.size() && !zones[timezoneId].empty();
  408. }
  409. inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) {
  410. static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
  411. int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
  412. if (cmp == 0)
  413. return a.size() < b.size();
  414. return cmp < 0;
  415. };
  416. static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
  417. {"jan", 1},
  418. {"feb", 2},
  419. {"mar", 3},
  420. {"apr", 4},
  421. {"may", 5},
  422. {"jun", 6},
  423. {"jul", 7},
  424. {"aug", 8},
  425. {"sep", 9},
  426. {"oct", 10},
  427. {"nov", 11},
  428. {"dec", 12}
  429. };
  430. const auto& it = mp.find(monthName);
  431. if (it != mp.end()) {
  432. month = it -> second;
  433. return true;
  434. }
  435. return false;
  436. }
  437. inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) {
  438. static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
  439. int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
  440. if (cmp == 0)
  441. return a.size() < b.size();
  442. return cmp < 0;
  443. };
  444. static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
  445. {"january", 1},
  446. {"february", 2},
  447. {"march", 3},
  448. {"april", 4},
  449. {"may", 5},
  450. {"june", 6},
  451. {"july", 7},
  452. {"august", 8},
  453. {"september", 9},
  454. {"october", 10},
  455. {"november", 11},
  456. {"december", 12}
  457. };
  458. const auto& it = mp.find(monthName);
  459. if (it != mp.end()) {
  460. month = it -> second;
  461. return true;
  462. }
  463. return false;
  464. }
  465. inline bool ValidateDatetime(ui32 datetime) {
  466. return datetime < MAX_DATETIME;
  467. }
  468. inline bool ValidateTimestamp(ui64 timestamp) {
  469. return timestamp < MAX_TIMESTAMP;
  470. }
  471. inline bool ValidateInterval(i64 interval) {
  472. return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP);
  473. }
  474. // Split
  475. template<typename TUserDataType, bool Nullable>
  476. using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result,
  477. TTzDateBlockReader<TUserDataType, Nullable>,
  478. TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>;
  479. template<typename TUserDataType>
  480. struct TSplitKernelExec : TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> {
  481. static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder);
  482. template<typename TSink>
  483. static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) {
  484. try {
  485. TBlockItem res {0};
  486. Split(arg, Reference(res), *valueBuilder);
  487. sink(res);
  488. } catch (const std::exception& e) {
  489. UdfTerminate((TStringBuilder() << e.what()).data());
  490. }
  491. }
  492. };
  493. template <typename TUserDataType>
  494. class TSplit : public TBoxedValue {
  495. const TSourcePosition Pos_;
  496. public:
  497. explicit TSplit(TSourcePosition pos)
  498. : Pos_(pos)
  499. {}
  500. TUnboxedValue Run(
  501. const IValueBuilder* valueBuilder,
  502. const TUnboxedValuePod* args) const override;
  503. static bool DeclareSignature(
  504. TStringRef name,
  505. TType* userType,
  506. IFunctionTypeInfoBuilder& builder,
  507. bool typesOnly)
  508. {
  509. const auto typeInfoHelper = builder.TypeInfoHelper();
  510. TTupleTypeInspector tuple(*typeInfoHelper, userType);
  511. Y_ENSURE(tuple);
  512. Y_ENSURE(tuple.GetElementsCount() > 0);
  513. TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
  514. Y_ENSURE(argsTuple);
  515. if (argsTuple.GetElementsCount() != 1) {
  516. builder.SetError("Expected one argument");
  517. return true;
  518. }
  519. auto argType = argsTuple.GetElementType(0);
  520. builder.UserType(userType);
  521. builder.SupportsBlocks();
  522. builder.IsStrict();
  523. TBlockTypeInspector block(*typeInfoHelper, argType);
  524. if (block) {
  525. const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build();
  526. builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap);
  527. const auto* retType = builder.Resource(TMResourceName);
  528. const auto* blockRetType = builder.Block(false)->Item(retType).Build();
  529. builder.Returns(blockRetType);
  530. if (!typesOnly) {
  531. builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(),
  532. TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE));
  533. }
  534. } else {
  535. builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap);
  536. if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::BigDateType) {
  537. builder.Returns(builder.Resource(TM64ResourceName));
  538. } else {
  539. builder.Returns(builder.Resource(TMResourceName));
  540. }
  541. if (!typesOnly) {
  542. builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition()));
  543. }
  544. }
  545. return true;
  546. }
  547. };
  548. template <>
  549. void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  550. storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>());
  551. }
  552. template <>
  553. void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  554. storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>());
  555. }
  556. template <>
  557. void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  558. storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>());
  559. }
  560. template <>
  561. void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  562. storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId());
  563. }
  564. template <>
  565. void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  566. storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId());
  567. }
  568. template <>
  569. void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
  570. storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId());
  571. }
  572. template <>
  573. void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  574. ythrow yexception() << "Not implemented";
  575. }
  576. template <>
  577. void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  578. ythrow yexception() << "Not implemented";
  579. }
  580. template <>
  581. void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
  582. ythrow yexception() << "Not implemented";
  583. }
  584. template <>
  585. TUnboxedValue TSplit<TDate>::Run(
  586. const IValueBuilder* valueBuilder,
  587. const TUnboxedValuePod* args) const
  588. {
  589. try {
  590. EMPTY_RESULT_ON_EMPTY_ARG(0);
  591. auto& builder = valueBuilder->GetDateBuilder();
  592. TUnboxedValuePod result(0);
  593. auto& storage = Reference(result);
  594. storage.FromDate(builder, args[0].Get<ui16>());
  595. return result;
  596. } catch (const std::exception& e) {
  597. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  598. }
  599. }
  600. template <>
  601. TUnboxedValue TSplit<TDate32>::Run(
  602. const IValueBuilder* valueBuilder,
  603. const TUnboxedValuePod* args) const
  604. {
  605. try {
  606. EMPTY_RESULT_ON_EMPTY_ARG(0);
  607. TUnboxedValuePod result(0);
  608. auto& storage = Reference64(result);
  609. storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>());
  610. return result;
  611. } catch (const std::exception& e) {
  612. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  613. }
  614. }
  615. template <>
  616. TUnboxedValue TSplit<TDatetime>::Run(
  617. const IValueBuilder* valueBuilder,
  618. const TUnboxedValuePod* args) const
  619. {
  620. try {
  621. EMPTY_RESULT_ON_EMPTY_ARG(0);
  622. auto& builder = valueBuilder->GetDateBuilder();
  623. TUnboxedValuePod result(0);
  624. auto& storage = Reference(result);
  625. storage.FromDatetime(builder, args[0].Get<ui32>());
  626. return result;
  627. } catch (const std::exception& e) {
  628. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  629. }
  630. }
  631. template <>
  632. TUnboxedValue TSplit<TDatetime64>::Run(
  633. const IValueBuilder* valueBuilder,
  634. const TUnboxedValuePod* args) const
  635. {
  636. try {
  637. EMPTY_RESULT_ON_EMPTY_ARG(0);
  638. TUnboxedValuePod result(0);
  639. auto& storage = Reference64(result);
  640. storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
  641. return result;
  642. } catch (const std::exception& e) {
  643. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  644. }
  645. }
  646. template <>
  647. TUnboxedValue TSplit<TTimestamp>::Run(
  648. const IValueBuilder* valueBuilder,
  649. const TUnboxedValuePod* args) const
  650. {
  651. try {
  652. EMPTY_RESULT_ON_EMPTY_ARG(0);
  653. auto& builder = valueBuilder->GetDateBuilder();
  654. TUnboxedValuePod result(0);
  655. auto& storage = Reference(result);
  656. storage.FromTimestamp(builder, args[0].Get<ui64>());
  657. return result;
  658. } catch (const std::exception& e) {
  659. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  660. }
  661. }
  662. template <>
  663. TUnboxedValue TSplit<TTimestamp64>::Run(
  664. const IValueBuilder* valueBuilder,
  665. const TUnboxedValuePod* args) const
  666. {
  667. try {
  668. EMPTY_RESULT_ON_EMPTY_ARG(0);
  669. TUnboxedValuePod result(0);
  670. auto& storage = Reference64(result);
  671. storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
  672. return result;
  673. } catch (const std::exception& e) {
  674. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  675. }
  676. }
  677. template <>
  678. TUnboxedValue TSplit<TTzDate>::Run(
  679. const IValueBuilder* valueBuilder,
  680. const TUnboxedValuePod* args) const
  681. {
  682. try {
  683. EMPTY_RESULT_ON_EMPTY_ARG(0);
  684. auto& builder = valueBuilder->GetDateBuilder();
  685. TUnboxedValuePod result(0);
  686. auto& storage = Reference(result);
  687. storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId());
  688. return result;
  689. } catch (const std::exception& e) {
  690. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  691. }
  692. }
  693. template <>
  694. TUnboxedValue TSplit<TTzDatetime>::Run(
  695. const IValueBuilder* valueBuilder,
  696. const TUnboxedValuePod* args) const
  697. {
  698. try {
  699. EMPTY_RESULT_ON_EMPTY_ARG(0);
  700. auto& builder = valueBuilder->GetDateBuilder();
  701. TUnboxedValuePod result(0);
  702. auto& storage = Reference(result);
  703. storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId());
  704. return result;
  705. } catch (const std::exception& e) {
  706. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  707. }
  708. }
  709. template <>
  710. TUnboxedValue TSplit<TTzTimestamp>::Run(
  711. const IValueBuilder* valueBuilder,
  712. const TUnboxedValuePod* args) const
  713. {
  714. try {
  715. EMPTY_RESULT_ON_EMPTY_ARG(0);
  716. auto& builder = valueBuilder->GetDateBuilder();
  717. TUnboxedValuePod result(0);
  718. auto& storage = Reference(result);
  719. storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId());
  720. return result;
  721. } catch (const std::exception& e) {
  722. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  723. }
  724. }
  725. // Make*
  726. template<typename TUserDataType, bool Nullable>
  727. using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result,
  728. TTzDateArrayBuilder<TUserDataType, Nullable>,
  729. TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>;
  730. template<typename TUserDataType>
  731. struct TMakeDateKernelExec : TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> {
  732. static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder);
  733. template<typename TSink>
  734. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  735. auto& storage = Reference(item);
  736. sink(TBlockItem(Make(storage, *valueBuilder)));
  737. }
  738. };
  739. template<> TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  740. TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false));
  741. return res;
  742. }
  743. template<> TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  744. TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
  745. return res;
  746. }
  747. template<> TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  748. TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
  749. return res;
  750. }
  751. template<> TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  752. TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true));
  753. res.SetTimezoneId(storage.TimezoneId);
  754. return res;
  755. }
  756. template<> TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  757. TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
  758. res.SetTimezoneId(storage.TimezoneId);
  759. return res;
  760. }
  761. template<> TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
  762. TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
  763. res.SetTimezoneId(storage.TimezoneId);
  764. return res;
  765. }
  766. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) {
  767. auto& builder = valueBuilder->GetDateBuilder();
  768. auto& storage = Reference(args[0]);
  769. return TUnboxedValuePod(storage.ToDate(builder, false));
  770. }
  771. END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do);
  772. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) {
  773. auto& builder = valueBuilder->GetDateBuilder();
  774. auto& storage = Reference(args[0]);
  775. return TUnboxedValuePod(storage.ToDatetime(builder));
  776. }
  777. END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do);
  778. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) {
  779. auto& builder = valueBuilder->GetDateBuilder();
  780. auto& storage = Reference(args[0]);
  781. return TUnboxedValuePod(storage.ToTimestamp(builder));
  782. }
  783. END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do);
  784. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) {
  785. auto& builder = valueBuilder->GetDateBuilder();
  786. auto& storage = Reference(args[0]);
  787. try {
  788. TUnboxedValuePod result(storage.ToDate(builder, true));
  789. result.SetTimezoneId(storage.TimezoneId);
  790. return result;
  791. } catch (const std::exception& e) {
  792. UdfTerminate((TStringBuilder() << Pos_ << "Timestamp "
  793. << storage.ToString()
  794. << " cannot be casted to TzDate"
  795. ).data());
  796. }
  797. }
  798. END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do);
  799. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) {
  800. auto& builder = valueBuilder->GetDateBuilder();
  801. auto& storage = Reference(args[0]);
  802. TUnboxedValuePod result(storage.ToDatetime(builder));
  803. result.SetTimezoneId(storage.TimezoneId);
  804. return result;
  805. }
  806. END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do);
  807. BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) {
  808. auto& builder = valueBuilder->GetDateBuilder();
  809. auto& storage = Reference(args[0]);
  810. TUnboxedValuePod result(storage.ToTimestamp(builder));
  811. result.SetTimezoneId(storage.TimezoneId);
  812. return result;
  813. }
  814. END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do);
  815. SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) {
  816. Y_UNUSED(valueBuilder);
  817. TUnboxedValuePod result(0);
  818. auto& arg = Reference(args[0]);
  819. auto& storage = Reference64(result);
  820. storage.From(arg);
  821. return result;
  822. }
  823. SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) {
  824. auto& storage = Reference64(args[0]);
  825. return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder()));
  826. }
  827. SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) {
  828. auto& storage = Reference64(args[0]);
  829. return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder()));
  830. }
  831. SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) {
  832. auto& storage = Reference64(args[0]);
  833. return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder()));
  834. }
  835. // Get*
  836. #define GET_METHOD(field, type) \
  837. SIMPLE_STRICT_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \
  838. Y_UNUSED(valueBuilder); \
  839. return TUnboxedValuePod(Get##field(args[0])); \
  840. }
  841. // #define GET_METHOD(field, type) \
  842. // struct TGet##field##KernelExec : TUnaryKernelExec<TGet##field##KernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<type, false>> { \
  843. // template<typename TSink> \
  844. // static void Process(TBlockItem item, const IValueBuilder& valueBuilder, const TSink& sink) { \
  845. // Y_UNUSED(valueBuilder); \
  846. // sink(TBlockItem(Get##field(item))); \
  847. // } \
  848. // }; \
  849. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \
  850. // Y_UNUSED(valueBuilder); \
  851. // return TUnboxedValuePod(Get##field(args[0])); \
  852. // } \
  853. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  854. GET_METHOD(Year, ui16)
  855. GET_METHOD(DayOfYear, ui16)
  856. GET_METHOD(Month, ui8)
  857. // template<typename TValue>
  858. // TValue GetMonthNameValue(size_t idx) {
  859. // static const std::array<TValue, 12U> monthNames = {{
  860. // TValue::Embedded(TStringRef::Of("January")),
  861. // TValue::Embedded(TStringRef::Of("February")),
  862. // TValue::Embedded(TStringRef::Of("March")),
  863. // TValue::Embedded(TStringRef::Of("April")),
  864. // TValue::Embedded(TStringRef::Of("May")),
  865. // TValue::Embedded(TStringRef::Of("June")),
  866. // TValue::Embedded(TStringRef::Of("July")),
  867. // TValue::Embedded(TStringRef::Of("August")),
  868. // TValue::Embedded(TStringRef::Of("September")),
  869. // TValue::Embedded(TStringRef::Of("October")),
  870. // TValue::Embedded(TStringRef::Of("November")),
  871. // TValue::Embedded(TStringRef::Of("December"))
  872. // }};
  873. // return monthNames.at(idx);
  874. // }
  875. // struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
  876. // template<typename TSink>
  877. // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  878. // Y_UNUSED(valueBuilder);
  879. // sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U));
  880. // }
  881. // };
  882. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) {
  883. // Y_UNUSED(valueBuilder);
  884. // return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U);
  885. // }
  886. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  887. SIMPLE_STRICT_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) {
  888. Y_UNUSED(valueBuilder);
  889. static const std::array<TUnboxedValue, 12U> monthNames = {{
  890. TUnboxedValuePod::Embedded(TStringRef::Of("January")),
  891. TUnboxedValuePod::Embedded(TStringRef::Of("February")),
  892. TUnboxedValuePod::Embedded(TStringRef::Of("March")),
  893. TUnboxedValuePod::Embedded(TStringRef::Of("April")),
  894. TUnboxedValuePod::Embedded(TStringRef::Of("May")),
  895. TUnboxedValuePod::Embedded(TStringRef::Of("June")),
  896. TUnboxedValuePod::Embedded(TStringRef::Of("July")),
  897. TUnboxedValuePod::Embedded(TStringRef::Of("August")),
  898. TUnboxedValuePod::Embedded(TStringRef::Of("September")),
  899. TUnboxedValuePod::Embedded(TStringRef::Of("October")),
  900. TUnboxedValuePod::Embedded(TStringRef::Of("November")),
  901. TUnboxedValuePod::Embedded(TStringRef::Of("December"))
  902. }};
  903. return monthNames.at(GetMonth(*args) - 1U);
  904. }
  905. GET_METHOD(WeekOfYear, ui8)
  906. GET_METHOD(WeekOfYearIso8601, ui8)
  907. // struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> {
  908. // template<typename TSink>
  909. // static void Process(TBlockItem item, const TSink& sink) {
  910. // sink(GetDay(item));
  911. // }
  912. // };
  913. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) {
  914. // Y_UNUSED(valueBuilder);
  915. // return TUnboxedValuePod(GetDay(args[0]));
  916. // }
  917. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  918. SIMPLE_STRICT_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) {
  919. Y_UNUSED(valueBuilder);
  920. return TUnboxedValuePod(GetDay(args[0]));
  921. }
  922. GET_METHOD(DayOfWeek, ui8)
  923. template<typename TValue>
  924. TValue GetDayNameValue(size_t idx) {
  925. static const std::array<TValue, 7U> dayNames = {{
  926. TValue::Embedded(TStringRef::Of("Monday")),
  927. TValue::Embedded(TStringRef::Of("Tuesday")),
  928. TValue::Embedded(TStringRef::Of("Wednesday")),
  929. TValue::Embedded(TStringRef::Of("Thursday")),
  930. TValue::Embedded(TStringRef::Of("Friday")),
  931. TValue::Embedded(TStringRef::Of("Saturday")),
  932. TValue::Embedded(TStringRef::Of("Sunday"))
  933. }};
  934. return dayNames.at(idx);
  935. }
  936. SIMPLE_STRICT_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) {
  937. Y_UNUSED(valueBuilder);
  938. return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U);
  939. }
  940. // struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
  941. // template<typename TSink>
  942. // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  943. // Y_UNUSED(valueBuilder);
  944. // sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U));
  945. // }
  946. // };
  947. // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) {
  948. // Y_UNUSED(valueBuilder);
  949. // return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U);
  950. // }
  951. // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
  952. GET_METHOD(TimezoneId, ui16)
  953. struct TTGetTimezoneNameKernelExec : TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> {
  954. template<typename TSink>
  955. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  956. Y_UNUSED(valueBuilder);
  957. auto timezoneId = GetTimezoneId(item);
  958. if (timezoneId >= NUdf::GetTimezones().size()) {
  959. sink(TBlockItem{});
  960. } else {
  961. sink(TBlockItem{NUdf::GetTimezones()[timezoneId]});
  962. }
  963. }
  964. };
  965. BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetTimezoneName, char*(TAutoMap<TResource<TMResourceName>>)) {
  966. auto timezoneId = GetTimezoneId(args[0]);
  967. if (timezoneId >= NUdf::GetTimezones().size()) {
  968. return TUnboxedValuePod();
  969. }
  970. return valueBuilder->NewString(NUdf::GetTimezones()[timezoneId]);
  971. }
  972. END_SIMPLE_ARROW_UDF(TGetTimezoneName, TTGetTimezoneNameKernelExec::Do);
  973. // Update
  974. class TUpdate : public TBoxedValue {
  975. const TSourcePosition Pos_;
  976. public:
  977. explicit TUpdate(TSourcePosition pos)
  978. : Pos_(pos)
  979. {}
  980. TUnboxedValue Run(
  981. const IValueBuilder* valueBuilder,
  982. const TUnboxedValuePod* args) const override
  983. {
  984. try {
  985. EMPTY_RESULT_ON_EMPTY_ARG(0);
  986. auto result = args[0];
  987. if (args[1]) {
  988. auto year = args[1].Get<ui16>();
  989. if (!ValidateYear(year)) {
  990. return TUnboxedValuePod();
  991. }
  992. SetYear(result, year);
  993. }
  994. if (args[2]) {
  995. auto month = args[2].Get<ui8>();
  996. if (!ValidateMonth(month)) {
  997. return TUnboxedValuePod();
  998. }
  999. SetMonth(result, month);
  1000. }
  1001. if (args[3]) {
  1002. auto day = args[3].Get<ui8>();
  1003. if (!ValidateDay(day)) {
  1004. return TUnboxedValuePod();
  1005. }
  1006. SetDay(result, day);
  1007. }
  1008. if (args[4]) {
  1009. auto hour = args[4].Get<ui8>();
  1010. if (!ValidateHour(hour)) {
  1011. return TUnboxedValuePod();
  1012. }
  1013. SetHour(result, hour);
  1014. }
  1015. if (args[5]) {
  1016. auto minute = args[5].Get<ui8>();
  1017. if (!ValidateMinute(minute)) {
  1018. return TUnboxedValuePod();
  1019. }
  1020. SetMinute(result, minute);
  1021. }
  1022. if (args[6]) {
  1023. auto second = args[6].Get<ui8>();
  1024. if (!ValidateSecond(second)) {
  1025. return TUnboxedValuePod();
  1026. }
  1027. SetSecond(result, second);
  1028. }
  1029. if (args[7]) {
  1030. auto microsecond = args[7].Get<ui32>();
  1031. if (!ValidateMicrosecond(microsecond)) {
  1032. return TUnboxedValuePod();
  1033. }
  1034. SetMicrosecond(result, microsecond);
  1035. }
  1036. if (args[8]) {
  1037. auto timezoneId = args[8].Get<ui16>();
  1038. if (!ValidateTimezoneId(timezoneId)) {
  1039. return TUnboxedValuePod();
  1040. }
  1041. SetTimezoneId(result, timezoneId);
  1042. }
  1043. auto& builder = valueBuilder->GetDateBuilder();
  1044. auto& storage = Reference(result);
  1045. if (!storage.Validate(builder)) {
  1046. return TUnboxedValuePod();
  1047. }
  1048. return result;
  1049. } catch (const std::exception& e) {
  1050. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1051. }
  1052. }
  1053. static const TStringRef& Name() {
  1054. static auto name = TStringRef::Of("Update");
  1055. return name;
  1056. }
  1057. static bool DeclareSignature(
  1058. const TStringRef& name,
  1059. TType*,
  1060. IFunctionTypeInfoBuilder& builder,
  1061. bool typesOnly)
  1062. {
  1063. if (Name() != name) {
  1064. return false;
  1065. }
  1066. auto resourceType = builder.Resource(TMResourceName);
  1067. auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
  1068. builder.OptionalArgs(8).Args()->Add(resourceType).Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1069. .Add(builder.Optional()->Item<ui16>().Build()).Name("Year")
  1070. .Add(builder.Optional()->Item<ui8>().Build()).Name("Month")
  1071. .Add(builder.Optional()->Item<ui8>().Build()).Name("Day")
  1072. .Add(builder.Optional()->Item<ui8>().Build()).Name("Hour")
  1073. .Add(builder.Optional()->Item<ui8>().Build()).Name("Minute")
  1074. .Add(builder.Optional()->Item<ui8>().Build()).Name("Second")
  1075. .Add(builder.Optional()->Item<ui32>().Build()).Name("Microsecond")
  1076. .Add(builder.Optional()->Item<ui16>().Build()).Name("TimezoneId");
  1077. builder.Returns(optionalResourceType);
  1078. if (!typesOnly) {
  1079. builder.Implementation(new TUpdate(builder.GetSourcePosition()));
  1080. }
  1081. builder.IsStrict();
  1082. return true;
  1083. }
  1084. };
  1085. // From*
  1086. BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromSeconds, TOptional<TTimestamp>(TAutoMap<ui32>)) {
  1087. Y_UNUSED(valueBuilder);
  1088. auto res = args[0].Get<ui32>();
  1089. if (!ValidateDatetime(res)) {
  1090. return TUnboxedValuePod();
  1091. }
  1092. return TUnboxedValuePod((ui64)(res * 1000000ull));
  1093. }
  1094. using TFromSecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui32, ui64,
  1095. [] (ui32 seconds) { return std::make_pair(ui64(seconds * 1000000ull), ValidateDatetime(seconds)); }>;
  1096. END_SIMPLE_ARROW_UDF(TFromSeconds, TFromSecondsKernel::Do);
  1097. BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromMilliseconds, TOptional<TTimestamp>(TAutoMap<ui64>)) {
  1098. Y_UNUSED(valueBuilder);
  1099. auto res = args[0].Get<ui64>();
  1100. if (res >= MAX_TIMESTAMP / 1000u) {
  1101. return TUnboxedValuePod();
  1102. }
  1103. return TUnboxedValuePod(res * 1000u);
  1104. }
  1105. using TFromMillisecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui64, ui64,
  1106. [] (ui64 milliseconds) { return std::make_pair(ui64(milliseconds * 1000u), milliseconds < MAX_TIMESTAMP / 1000u); }>;
  1107. END_SIMPLE_ARROW_UDF(TFromMilliseconds, TFromMillisecondsKernel::Do);
  1108. BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromMicroseconds, TOptional<TTimestamp>(TAutoMap<ui64>)) {
  1109. Y_UNUSED(valueBuilder);
  1110. auto res = args[0].Get<ui64>();
  1111. if (!ValidateTimestamp(res)) {
  1112. return TUnboxedValuePod();
  1113. }
  1114. return TUnboxedValuePod(res);
  1115. }
  1116. using TFromMicrosecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui64, ui64,
  1117. [] (ui64 timestamp) { return std::make_pair(timestamp, ValidateTimestamp(timestamp)); }>;
  1118. END_SIMPLE_ARROW_UDF(TFromMicroseconds, TFromMicrosecondsKernel::Do);
  1119. template <typename TInput, i64 Multiplier>
  1120. using TIntervalFromKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput, i64,
  1121. [] (TInput interval) { return std::make_pair(i64(interval * Multiplier), ValidateInterval(interval)); }>;
  1122. BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromDays, TOptional<TInterval>(TAutoMap<i32>)) {
  1123. Y_UNUSED(valueBuilder);
  1124. const i64 res = i64(args[0].Get<i32>()) * UsecondsInDay;
  1125. return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
  1126. }
  1127. END_SIMPLE_ARROW_UDF(TIntervalFromDays, (TIntervalFromKernel<i32, UsecondsInDay>::Do));
  1128. BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromHours, TOptional<TInterval>(TAutoMap<i32>)) {
  1129. Y_UNUSED(valueBuilder);
  1130. const i64 res = i64(args[0].Get<i32>()) * UsecondsInHour;
  1131. return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
  1132. }
  1133. END_SIMPLE_ARROW_UDF(TIntervalFromHours, (TIntervalFromKernel<i32, UsecondsInHour>::Do));
  1134. BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMinutes, TOptional<TInterval>(TAutoMap<i32>)) {
  1135. Y_UNUSED(valueBuilder);
  1136. const i64 res = i64(args[0].Get<i32>()) * UsecondsInMinute;
  1137. return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
  1138. }
  1139. END_SIMPLE_ARROW_UDF(TIntervalFromMinutes, (TIntervalFromKernel<i32, UsecondsInMinute>::Do));
  1140. BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromSeconds, TOptional<TInterval>(TAutoMap<i32>)) {
  1141. Y_UNUSED(valueBuilder);
  1142. const i64 res = i64(args[0].Get<i32>()) * UsecondsInSecond;
  1143. return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
  1144. }
  1145. END_SIMPLE_ARROW_UDF(TIntervalFromSeconds, (TIntervalFromKernel<i32, UsecondsInSecond>::Do));
  1146. BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMilliseconds, TOptional<TInterval>(TAutoMap<i64>)) {
  1147. Y_UNUSED(valueBuilder);
  1148. const i64 res = i64(args[0].Get<i64>()) * UsecondsInMilliseconds;
  1149. return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
  1150. }
  1151. END_SIMPLE_ARROW_UDF(TIntervalFromMilliseconds, (TIntervalFromKernel<i64, UsecondsInMilliseconds>::Do));
  1152. BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMicroseconds, TOptional<TInterval>(TAutoMap<i64>)) {
  1153. Y_UNUSED(valueBuilder);
  1154. const i64 res = args[0].Get<i64>();
  1155. return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
  1156. }
  1157. END_SIMPLE_ARROW_UDF(TIntervalFromMicroseconds, (TIntervalFromKernel<i64, 1>::Do));
  1158. // To*
  1159. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToDays, i32(TAutoMap<TInterval>)) {
  1160. Y_UNUSED(valueBuilder);
  1161. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInDay));
  1162. }
  1163. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToDays,
  1164. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInDay); }>),
  1165. arrow::compute::NullHandling::INTERSECTION);
  1166. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToHours, i32(TAutoMap<TInterval>)) {
  1167. Y_UNUSED(valueBuilder);
  1168. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInHour));
  1169. }
  1170. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToHours,
  1171. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInHour); }>),
  1172. arrow::compute::NullHandling::INTERSECTION);
  1173. BEGIN_SIMPLE_STRICT_ARROW_UDF(TToMinutes, i32(TAutoMap<TInterval>)) {
  1174. Y_UNUSED(valueBuilder);
  1175. return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInMinute));
  1176. }
  1177. END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToMinutes,
  1178. (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInMinute); }>),
  1179. arrow::compute::NullHandling::INTERSECTION);
  1180. // StartOf*
  1181. template<auto Core>
  1182. struct TStartOfKernelExec : TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> {
  1183. template<typename TSink>
  1184. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1185. if (auto res = Core(Reference(item), *valueBuilder)) {
  1186. Reference(item) = res.GetRef();
  1187. sink(item);
  1188. } else {
  1189. sink(TBlockItem{});
  1190. }
  1191. }
  1192. };
  1193. template<auto Core>
  1194. TUnboxedValue SimpleDatetimeToDatetimeUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) {
  1195. auto result = args[0];
  1196. auto& storage = Reference(result);
  1197. if (auto res = Core(storage, *valueBuilder)) {
  1198. storage = res.GetRef();
  1199. return result;
  1200. }
  1201. return TUnboxedValuePod{};
  1202. }
  1203. TMaybe<TTMStorage> StartOfYear(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1204. storage.Month = 1;
  1205. storage.Day = 1;
  1206. storage.Hour = 0;
  1207. storage.Minute = 0;
  1208. storage.Second = 0;
  1209. storage.Microsecond = 0;
  1210. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1211. return {};
  1212. }
  1213. return storage;
  1214. }
  1215. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfYear, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1216. return SimpleDatetimeToDatetimeUdf<StartOfYear>(valueBuilder, args);
  1217. }
  1218. END_SIMPLE_ARROW_UDF(TStartOfYear, TStartOfKernelExec<StartOfYear>::Do);
  1219. void SetEndOfDay(TTMStorage& storage) {
  1220. storage.Hour = 23;
  1221. storage.Minute = 59;
  1222. storage.Second = 59;
  1223. storage.Microsecond = 999999;
  1224. }
  1225. TMaybe<TTMStorage> EndOfYear(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1226. storage.Month = 12;
  1227. storage.Day = 31;
  1228. SetEndOfDay(storage);
  1229. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1230. return {};
  1231. }
  1232. return storage;
  1233. }
  1234. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfYear, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1235. return SimpleDatetimeToDatetimeUdf<EndOfYear>(valueBuilder, args);
  1236. }
  1237. END_SIMPLE_ARROW_UDF(TEndOfYear, TStartOfKernelExec<EndOfYear>::Do);
  1238. TMaybe<TTMStorage> StartOfQuarter(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1239. storage.Month = (storage.Month - 1) / 3 * 3 + 1;
  1240. storage.Day = 1;
  1241. storage.Hour = 0;
  1242. storage.Minute = 0;
  1243. storage.Second = 0;
  1244. storage.Microsecond = 0;
  1245. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1246. return {};
  1247. }
  1248. return storage;
  1249. }
  1250. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfQuarter, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1251. return SimpleDatetimeToDatetimeUdf<StartOfQuarter>(valueBuilder, args);
  1252. }
  1253. END_SIMPLE_ARROW_UDF(TStartOfQuarter, TStartOfKernelExec<StartOfQuarter>::Do);
  1254. TMaybe<TTMStorage> EndOfQuarter(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1255. storage.Month = ((storage.Month - 1) / 3 + 1) * 3;
  1256. storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
  1257. SetEndOfDay(storage);
  1258. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1259. return {};
  1260. }
  1261. return storage;
  1262. }
  1263. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfQuarter, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1264. return SimpleDatetimeToDatetimeUdf<EndOfQuarter>(valueBuilder, args);
  1265. }
  1266. END_SIMPLE_ARROW_UDF(TEndOfQuarter, TStartOfKernelExec<EndOfQuarter>::Do);
  1267. TMaybe<TTMStorage> StartOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1268. storage.Day = 1;
  1269. storage.Hour = 0;
  1270. storage.Minute = 0;
  1271. storage.Second = 0;
  1272. storage.Microsecond = 0;
  1273. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1274. return {};
  1275. }
  1276. return storage;
  1277. }
  1278. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1279. return SimpleDatetimeToDatetimeUdf<StartOfMonth>(valueBuilder, args);
  1280. }
  1281. END_SIMPLE_ARROW_UDF(TStartOfMonth, TStartOfKernelExec<StartOfMonth>::Do);
  1282. TMaybe<TTMStorage> EndOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1283. storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
  1284. SetEndOfDay(storage);
  1285. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1286. return {};
  1287. }
  1288. return storage;
  1289. }
  1290. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1291. return SimpleDatetimeToDatetimeUdf<EndOfMonth>(valueBuilder, args);
  1292. }
  1293. END_SIMPLE_ARROW_UDF(TEndOfMonth, TStartOfKernelExec<EndOfMonth>::Do);
  1294. TMaybe<TTMStorage> StartOfWeek(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1295. const ui32 shift = 86400u * (storage.DayOfWeek - 1u);
  1296. if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) {
  1297. return {};
  1298. }
  1299. storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId);
  1300. storage.Hour = 0;
  1301. storage.Minute = 0;
  1302. storage.Second = 0;
  1303. storage.Microsecond = 0;
  1304. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1305. return {};
  1306. }
  1307. return storage;
  1308. }
  1309. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfWeek, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1310. return SimpleDatetimeToDatetimeUdf<StartOfWeek>(valueBuilder, args);
  1311. }
  1312. END_SIMPLE_ARROW_UDF(TStartOfWeek, TStartOfKernelExec<StartOfWeek>::Do);
  1313. TMaybe<TTMStorage> EndOfWeek(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1314. const ui32 shift = 86400u * (7u - storage.DayOfWeek);
  1315. auto dt = storage.ToDatetime(valueBuilder.GetDateBuilder());
  1316. if (NUdf::MAX_DATETIME - shift <= dt) {
  1317. return {};
  1318. }
  1319. storage.FromDatetime(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId);
  1320. SetEndOfDay(storage);
  1321. if (!storage.Validate(valueBuilder.GetDateBuilder())) {
  1322. return {};
  1323. }
  1324. return storage;
  1325. }
  1326. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfWeek, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1327. return SimpleDatetimeToDatetimeUdf<EndOfWeek>(valueBuilder, args);
  1328. }
  1329. END_SIMPLE_ARROW_UDF(TEndOfWeek, TStartOfKernelExec<EndOfWeek>::Do);
  1330. TMaybe<TTMStorage> StartOfDay(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1331. storage.Hour = 0;
  1332. storage.Minute = 0;
  1333. storage.Second = 0;
  1334. storage.Microsecond = 0;
  1335. auto& builder = valueBuilder.GetDateBuilder();
  1336. if (!storage.Validate(builder)) {
  1337. return {};
  1338. }
  1339. return storage;
  1340. }
  1341. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfDay, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1342. return SimpleDatetimeToDatetimeUdf<StartOfDay>(valueBuilder, args);
  1343. }
  1344. END_SIMPLE_ARROW_UDF(TStartOfDay, TStartOfKernelExec<StartOfDay>::Do);
  1345. TMaybe<TTMStorage> EndOfDay(TTMStorage storage, const IValueBuilder& valueBuilder) {
  1346. SetEndOfDay(storage);
  1347. auto& builder = valueBuilder.GetDateBuilder();
  1348. if (!storage.Validate(builder)) {
  1349. return {};
  1350. }
  1351. return storage;
  1352. }
  1353. BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfDay, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
  1354. return SimpleDatetimeToDatetimeUdf<EndOfDay>(valueBuilder, args);
  1355. }
  1356. END_SIMPLE_ARROW_UDF(TEndOfDay, TStartOfKernelExec<EndOfDay>::Do);
  1357. TMaybe<TTMStorage> StartOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) {
  1358. if (interval >= 86400000000ull) {
  1359. // treat as StartOfDay
  1360. storage.Hour = 0;
  1361. storage.Minute = 0;
  1362. storage.Second = 0;
  1363. storage.Microsecond = 0;
  1364. } else {
  1365. auto current = storage.ToTimeOfDay();
  1366. auto rounded = current / interval * interval;
  1367. storage.FromTimeOfDay(rounded);
  1368. }
  1369. auto& builder = valueBuilder.GetDateBuilder();
  1370. if (!storage.Validate(builder)) {
  1371. return {};
  1372. }
  1373. return storage;
  1374. }
  1375. struct TStartOfBinaryKernelExec : TBinaryKernelExec<TStartOfBinaryKernelExec> {
  1376. template<typename TSink>
  1377. static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
  1378. auto& storage = Reference(arg1);
  1379. ui64 interval = std::abs(arg2.Get<i64>());
  1380. if (interval == 0) {
  1381. sink(arg1);
  1382. return;
  1383. }
  1384. if (auto res = StartOf(storage, interval, *valueBuilder)) {
  1385. storage = res.GetRef();
  1386. sink(arg1);
  1387. } else {
  1388. sink(TBlockItem{});
  1389. }
  1390. }
  1391. };
  1392. BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) {
  1393. auto result = args[0];
  1394. ui64 interval = std::abs(args[1].Get<i64>());
  1395. if (interval == 0) {
  1396. return result;
  1397. }
  1398. if (auto res = StartOf(Reference(result), interval, *valueBuilder)) {
  1399. Reference(result) = res.GetRef();
  1400. return result;
  1401. }
  1402. return TUnboxedValuePod{};
  1403. }
  1404. END_SIMPLE_ARROW_UDF(TStartOf, TStartOfBinaryKernelExec::Do);
  1405. struct TTimeOfDayKernelExec : TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> {
  1406. template<typename TSink>
  1407. static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
  1408. Y_UNUSED(valueBuilder);
  1409. auto& storage = Reference(item);
  1410. sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()});
  1411. }
  1412. };
  1413. const auto timeOfDayKernelExecDo = TTimeOfDayKernelExec::Do;
  1414. BEGIN_SIMPLE_STRICT_ARROW_UDF(TTimeOfDay, TInterval(TAutoMap<TResource<TMResourceName>>)) {
  1415. Y_UNUSED(valueBuilder);
  1416. auto& storage = Reference(args[0]);
  1417. return TUnboxedValuePod((i64)storage.ToTimeOfDay());
  1418. }
  1419. END_SIMPLE_ARROW_UDF(TTimeOfDay, timeOfDayKernelExecDo);
  1420. // Add ...
  1421. template<auto Core>
  1422. struct TAddKernelExec : TBinaryKernelExec<TAddKernelExec<Core>> {
  1423. template<typename TSink>
  1424. static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) {
  1425. sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder()));
  1426. }
  1427. };
  1428. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftYears, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1429. return DoAddYears(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1430. }
  1431. END_SIMPLE_ARROW_UDF(TShiftYears, TAddKernelExec<DoAddYears<TBlockItem>>::Do);
  1432. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftQuarters, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1433. return DoAddQuarters(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1434. }
  1435. END_SIMPLE_ARROW_UDF(TShiftQuarters, TAddKernelExec<DoAddQuarters<TBlockItem>>::Do);
  1436. BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftMonths, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
  1437. return DoAddMonths(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
  1438. }
  1439. END_SIMPLE_ARROW_UDF(TShiftMonths, TAddKernelExec<DoAddMonths<TBlockItem>>::Do);
  1440. template<size_t Digits, bool Exacly = true>
  1441. struct PrintNDigits;
  1442. template<bool Exacly>
  1443. struct PrintNDigits<0U, Exacly> {
  1444. static constexpr ui32 Miltiplier = 1U;
  1445. template <typename T>
  1446. static constexpr size_t Do(T, char*) { return 0U; }
  1447. };
  1448. template<size_t Digits, bool Exacly>
  1449. struct PrintNDigits {
  1450. using TNextPrint = PrintNDigits<Digits - 1U, Exacly>;
  1451. static constexpr ui32 Miltiplier = TNextPrint::Miltiplier * 10U;
  1452. template <typename T>
  1453. static constexpr size_t Do(T in, char* out) {
  1454. in %= Miltiplier;
  1455. if (Exacly || in) {
  1456. *out = "0123456789"[in / TNextPrint::Miltiplier];
  1457. return 1U + TNextPrint::Do(in, ++out);
  1458. }
  1459. return 0U;
  1460. }
  1461. };
  1462. // Format
  1463. class TFormat : public TBoxedValue {
  1464. public:
  1465. explicit TFormat(TSourcePosition pos)
  1466. : Pos_(pos)
  1467. {}
  1468. static const TStringRef& Name() {
  1469. static auto name = TStringRef::Of("Format");
  1470. return name;
  1471. }
  1472. static bool DeclareSignature(
  1473. const TStringRef& name,
  1474. TType*,
  1475. IFunctionTypeInfoBuilder& builder,
  1476. bool typesOnly)
  1477. {
  1478. if (Name() != name) {
  1479. return false;
  1480. }
  1481. auto resourceType = builder.Resource(TMResourceName);
  1482. auto stringType = builder.SimpleType<char*>();
  1483. auto boolType = builder.SimpleType<bool>();
  1484. auto optionalBoolType = builder.Optional()->Item(boolType).Build();
  1485. auto args = builder.Args();
  1486. args->Add(stringType);
  1487. args->Add(optionalBoolType).Name("AlwaysWriteFractionalSeconds");
  1488. args->Done();
  1489. builder.OptionalArgs(1);
  1490. builder.Returns(
  1491. builder.Callable(1)
  1492. ->Returns(stringType)
  1493. .Arg(resourceType)
  1494. .Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1495. .Build()
  1496. );
  1497. if (!typesOnly) {
  1498. builder.Implementation(new TFormat(builder.GetSourcePosition()));
  1499. }
  1500. return true;
  1501. }
  1502. private:
  1503. using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>;
  1504. struct TDataPrinter {
  1505. const std::string_view Data;
  1506. size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const {
  1507. std::memcpy(out, Data.data(), Data.size());
  1508. return Data.size();
  1509. }
  1510. };
  1511. TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
  1512. bool alwaysWriteFractionalSeconds = false;
  1513. if (auto val = args[1]) {
  1514. alwaysWriteFractionalSeconds = val.Get<bool>();
  1515. }
  1516. return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds));
  1517. } catch (const std::exception& e) {
  1518. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1519. }
  1520. class TImpl : public TBoxedValue {
  1521. public:
  1522. TUnboxedValue Run(
  1523. const IValueBuilder* valueBuilder,
  1524. const TUnboxedValuePod* args) const override
  1525. {
  1526. try {
  1527. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1528. const auto value = args[0];
  1529. auto& builder = valueBuilder->GetDateBuilder();
  1530. auto result = valueBuilder->NewStringNotFilled(ReservedSize_);
  1531. auto pos = result.AsStringRef().Data();
  1532. ui32 size = 0U;
  1533. for (const auto& printer : Printers_) {
  1534. if (const auto plus = printer(pos, value, builder)) {
  1535. size += plus;
  1536. pos += plus;
  1537. }
  1538. }
  1539. if (size < ReservedSize_) {
  1540. result = valueBuilder->SubString(result.Release(), 0U, size);
  1541. }
  1542. return result;
  1543. } catch (const std::exception& e) {
  1544. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1545. }
  1546. }
  1547. TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds)
  1548. : Pos_(pos)
  1549. , Format_(format)
  1550. {
  1551. const std::string_view formatView(Format_.AsStringRef());
  1552. auto dataStart = formatView.begin();
  1553. size_t dataSize = 0U;
  1554. for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
  1555. if (*ptr != '%') {
  1556. ++dataSize;
  1557. continue;
  1558. }
  1559. if (dataSize) {
  1560. Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)});
  1561. ReservedSize_ += dataSize;
  1562. dataSize = 0U;
  1563. }
  1564. if (formatView.end() == ++ptr) {
  1565. ythrow yexception() << "format string ends with single %%";
  1566. }
  1567. switch (*ptr) {
  1568. case '%': {
  1569. static constexpr size_t size = 1;
  1570. Printers_.emplace_back([](char* out, const TUnboxedValuePod&, const IDateBuilder&) {
  1571. *out = '%';
  1572. return size;
  1573. });
  1574. ReservedSize_ += size;
  1575. break;
  1576. }
  1577. case 'Y': {
  1578. static constexpr size_t size = 4;
  1579. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1580. return PrintNDigits<size>::Do(GetYear(value), out);
  1581. });
  1582. ReservedSize_ += size;
  1583. break;
  1584. }
  1585. case 'm': {
  1586. static constexpr size_t size = 2;
  1587. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1588. return PrintNDigits<size>::Do(GetMonth(value), out);
  1589. });
  1590. ReservedSize_ += size;
  1591. break;
  1592. }
  1593. case 'd': {
  1594. static constexpr size_t size = 2;
  1595. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1596. return PrintNDigits<size>::Do(GetDay(value), out);
  1597. });
  1598. ReservedSize_ += size;
  1599. break;
  1600. }
  1601. case 'H': {
  1602. static constexpr size_t size = 2;
  1603. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1604. return PrintNDigits<size>::Do(GetHour(value), out);
  1605. });
  1606. ReservedSize_ += size;
  1607. break;
  1608. }
  1609. case 'M': {
  1610. static constexpr size_t size = 2;
  1611. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1612. return PrintNDigits<size>::Do(GetMinute(value), out);
  1613. });
  1614. ReservedSize_ += size;
  1615. break;
  1616. }
  1617. case 'S':
  1618. Printers_.emplace_back([alwaysWriteFractionalSeconds](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1619. constexpr size_t size = 2;
  1620. if (const auto microsecond = GetMicrosecond(value); microsecond || alwaysWriteFractionalSeconds) {
  1621. out += PrintNDigits<size>::Do(GetSecond(value), out);
  1622. *out++ = '.';
  1623. constexpr size_t msize = 6;
  1624. auto addSz = alwaysWriteFractionalSeconds ?
  1625. PrintNDigits<msize, true>::Do(microsecond, out) :
  1626. PrintNDigits<msize, false>::Do(microsecond, out);
  1627. return size + 1U + addSz;
  1628. }
  1629. return PrintNDigits<size>::Do(GetSecond(value), out);
  1630. });
  1631. ReservedSize_ += 9;
  1632. break;
  1633. case 'z': {
  1634. static constexpr size_t size = 5;
  1635. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder& builder) {
  1636. auto timezoneId = GetTimezoneId(value);
  1637. if (TTMStorage::IsUniversal(timezoneId)) {
  1638. std::memcpy(out, "+0000", size);
  1639. return size;
  1640. }
  1641. i32 shift;
  1642. if (!builder.GetTimezoneShift(GetYear(value), GetMonth(value), GetDay(value),
  1643. GetHour(value), GetMinute(value), GetSecond(value), timezoneId, shift))
  1644. {
  1645. std::memcpy(out, "+0000", size);
  1646. return size;
  1647. }
  1648. *out++ = shift > 0 ? '+' : '-';
  1649. shift = std::abs(shift);
  1650. out += PrintNDigits<2U>::Do(shift / 60U, out);
  1651. out += PrintNDigits<2U>::Do(shift % 60U, out);
  1652. return size;
  1653. });
  1654. ReservedSize_ += size;
  1655. break;
  1656. }
  1657. case 'Z':
  1658. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1659. const auto timezoneId = GetTimezoneId(value);
  1660. const auto tzName = NUdf::GetTimezones()[timezoneId];
  1661. std::memcpy(out, tzName.data(), std::min(tzName.size(), MAX_TIMEZONE_NAME_LEN));
  1662. return tzName.size();
  1663. });
  1664. ReservedSize_ += MAX_TIMEZONE_NAME_LEN;
  1665. break;
  1666. case 'b': {
  1667. static constexpr size_t size = 3;
  1668. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1669. static constexpr std::string_view mp[] {
  1670. "Jan",
  1671. "Feb",
  1672. "Mar",
  1673. "Apr",
  1674. "May",
  1675. "Jun",
  1676. "Jul",
  1677. "Aug",
  1678. "Sep",
  1679. "Oct",
  1680. "Nov",
  1681. "Dec"
  1682. };
  1683. auto month = GetMonth(value);
  1684. Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
  1685. std::memcpy(out, mp[month - 1].data(), size);
  1686. return size;
  1687. });
  1688. ReservedSize_ += size;
  1689. break;
  1690. }
  1691. case 'B': {
  1692. Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
  1693. static constexpr std::string_view mp[] {
  1694. "January",
  1695. "February",
  1696. "March",
  1697. "April",
  1698. "May",
  1699. "June",
  1700. "July",
  1701. "August",
  1702. "September",
  1703. "October",
  1704. "November",
  1705. "December"
  1706. };
  1707. auto month = GetMonth(value);
  1708. Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
  1709. const std::string_view monthFullName = mp[month - 1];
  1710. std::memcpy(out, monthFullName.data(), monthFullName.size());
  1711. return monthFullName.size();
  1712. });
  1713. ReservedSize_ += 9U; // MAX_MONTH_FULL_NAME_LEN
  1714. break;
  1715. }
  1716. default:
  1717. ythrow yexception() << "invalid format character: " << *ptr;
  1718. }
  1719. dataStart = ptr + 1U;
  1720. }
  1721. if (dataSize) {
  1722. Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)});
  1723. ReservedSize_ += dataSize;
  1724. }
  1725. }
  1726. private:
  1727. const TSourcePosition Pos_;
  1728. TUnboxedValue Format_;
  1729. TPrintersList Printers_{};
  1730. size_t ReservedSize_ = 0;
  1731. };
  1732. const TSourcePosition Pos_;
  1733. };
  1734. template<size_t Digits>
  1735. struct ParseExaclyNDigits;
  1736. template<>
  1737. struct ParseExaclyNDigits<0U> {
  1738. template <typename T>
  1739. static constexpr bool Do(std::string_view::const_iterator&, T&) {
  1740. return true;
  1741. }
  1742. };
  1743. template<size_t Digits>
  1744. struct ParseExaclyNDigits {
  1745. template <typename T>
  1746. static constexpr bool Do(std::string_view::const_iterator& it, T& out) {
  1747. const auto d = *it;
  1748. if (!std::isdigit(d)) {
  1749. return false;
  1750. }
  1751. out *= 10U;
  1752. out += d - '0';
  1753. return ParseExaclyNDigits<Digits - 1U>::Do(++it, out);
  1754. }
  1755. };
  1756. // Parse
  1757. class TParse : public TBoxedValue {
  1758. public:
  1759. class TFactory : public TBoxedValue {
  1760. public:
  1761. explicit TFactory(TSourcePosition pos)
  1762. : Pos_(pos)
  1763. {}
  1764. private:
  1765. TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
  1766. return TUnboxedValuePod(new TParse(args[0], Pos_));
  1767. } catch (const std::exception& e) {
  1768. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1769. }
  1770. const TSourcePosition Pos_;
  1771. };
  1772. static const TStringRef& Name() {
  1773. static auto name = TStringRef::Of("Parse");
  1774. return name;
  1775. }
  1776. static bool DeclareSignature(
  1777. const TStringRef& name,
  1778. TType*,
  1779. IFunctionTypeInfoBuilder& builder,
  1780. bool typesOnly)
  1781. {
  1782. if (Name() != name) {
  1783. return false;
  1784. }
  1785. auto resourceType = builder.Resource(TMResourceName);
  1786. auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
  1787. builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap)
  1788. .Add(builder.Optional()->Item<ui16>())
  1789. .Done()
  1790. .OptionalArgs(1);
  1791. builder.RunConfig<char*>().Returns(optionalResourceType);
  1792. if (!typesOnly) {
  1793. builder.Implementation(new TParse::TFactory(builder.GetSourcePosition()));
  1794. }
  1795. return true;
  1796. }
  1797. private:
  1798. const TSourcePosition Pos_;
  1799. const TUnboxedValue Format_;
  1800. std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_;
  1801. struct TDataScanner {
  1802. const std::string_view Data_;
  1803. bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const {
  1804. if (limit < Data_.size() || !std::equal(Data_.begin(), Data_.end(), it)) {
  1805. return false;
  1806. }
  1807. std::advance(it, Data_.size());
  1808. return true;
  1809. }
  1810. };
  1811. TUnboxedValue Run(
  1812. const IValueBuilder* valueBuilder,
  1813. const TUnboxedValuePod* args) const override
  1814. {
  1815. try {
  1816. EMPTY_RESULT_ON_EMPTY_ARG(0);
  1817. const std::string_view buffer = args[0].AsStringRef();
  1818. TUnboxedValuePod result(0);
  1819. auto& storage = Reference(result);
  1820. storage.MakeDefault();
  1821. auto& builder = valueBuilder->GetDateBuilder();
  1822. auto it = buffer.begin();
  1823. for (const auto& scanner : Scanners_) {
  1824. if (!scanner(it, std::distance(it, buffer.end()), result, builder)) {
  1825. return TUnboxedValuePod();
  1826. }
  1827. }
  1828. if (buffer.end() != it || !storage.Validate(builder)) {
  1829. return TUnboxedValuePod();
  1830. }
  1831. return result;
  1832. } catch (const std::exception& e) {
  1833. UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
  1834. }
  1835. }
  1836. TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos)
  1837. : Pos_(pos)
  1838. , Format_(runConfig)
  1839. {
  1840. const std::string_view formatView(Format_.AsStringRef());
  1841. auto dataStart = formatView.begin();
  1842. size_t dataSize = 0U;
  1843. for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
  1844. if (*ptr != '%') {
  1845. ++dataSize;
  1846. continue;
  1847. }
  1848. if (dataSize) {
  1849. Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
  1850. dataSize = 0;
  1851. }
  1852. if (++ptr == formatView.end()) {
  1853. ythrow yexception() << "format string ends with single %%";
  1854. }
  1855. switch (*ptr) {
  1856. case '%':
  1857. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) {
  1858. return limit > 0U && *it++ == '%';
  1859. });
  1860. break;
  1861. case 'Y': {
  1862. static constexpr size_t size = 4;
  1863. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1864. ui32 year = 0U;
  1865. if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) {
  1866. return false;
  1867. }
  1868. SetYear(result, year);
  1869. return true;
  1870. });
  1871. break;
  1872. }
  1873. case 'm': {
  1874. static constexpr size_t size = 2;
  1875. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1876. ui32 month = 0U;
  1877. if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
  1878. return false;
  1879. }
  1880. SetMonth(result, month);
  1881. return true;
  1882. });
  1883. break;
  1884. }
  1885. case 'd': {
  1886. static constexpr size_t size = 2;
  1887. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1888. ui32 day = 0U;
  1889. if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) {
  1890. return false;
  1891. }
  1892. SetDay(result, day);
  1893. return true;
  1894. });
  1895. break;
  1896. }
  1897. case 'H': {
  1898. static constexpr size_t size = 2;
  1899. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1900. ui32 hour = 0U;
  1901. if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
  1902. return false;
  1903. }
  1904. SetHour(result, hour);
  1905. return true;
  1906. });
  1907. break;
  1908. }
  1909. case 'M': {
  1910. static constexpr size_t size = 2;
  1911. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1912. ui32 minute = 0U;
  1913. if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
  1914. return false;
  1915. }
  1916. SetMinute(result, minute);
  1917. return true;
  1918. });
  1919. break;
  1920. }
  1921. case 'S': {
  1922. static constexpr size_t size = 2;
  1923. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1924. ui32 second = 0U;
  1925. if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
  1926. return false;
  1927. }
  1928. SetSecond(result, second);
  1929. limit -= size;
  1930. if (!limit || *it != '.') {
  1931. return true;
  1932. }
  1933. ++it;
  1934. --limit;
  1935. ui32 usec = 0U;
  1936. size_t digits = 6U;
  1937. for (; limit; --limit) {
  1938. const auto c = *it;
  1939. if (!digits || !std::isdigit(c)) {
  1940. break;
  1941. }
  1942. usec *= 10U;
  1943. usec += c - '0';
  1944. ++it;
  1945. --digits;
  1946. }
  1947. for (; !digits && limit && std::isdigit(*it); --limit, ++it);
  1948. while (digits--) {
  1949. usec *= 10U;
  1950. }
  1951. SetMicrosecond(result, usec);
  1952. return true;
  1953. });
  1954. break;
  1955. }
  1956. case 'Z':
  1957. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder& builder) {
  1958. const auto start = it;
  1959. while (limit > 0 && (std::isalnum(*it) || *it == '/' || *it == '_' || *it == '-' || *it == '+')) {
  1960. ++it;
  1961. --limit;
  1962. }
  1963. const auto size = std::distance(start, it);
  1964. ui32 timezoneId;
  1965. if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) {
  1966. return false;
  1967. }
  1968. SetTimezoneId(result, timezoneId);
  1969. return true;
  1970. });
  1971. break;
  1972. case 'b': {
  1973. static constexpr size_t size = 3;
  1974. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1975. const auto start = it;
  1976. size_t cnt = 0U;
  1977. while (limit > 0 && cnt < size && std::isalpha(*it)) {
  1978. ++it;
  1979. ++cnt;
  1980. --limit;
  1981. }
  1982. const std::string_view monthName{start, cnt};
  1983. ui8 month = 0U;
  1984. if (cnt < size || !ValidateMonthShortName(monthName, month)) {
  1985. return false;
  1986. }
  1987. SetMonth(result, month);
  1988. return true;
  1989. });
  1990. break;
  1991. }
  1992. case 'B': {
  1993. Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
  1994. const auto start = it;
  1995. size_t cnt = 0U;
  1996. while (limit > 0 && std::isalpha(*it)) {
  1997. ++it;
  1998. ++cnt;
  1999. --limit;
  2000. }
  2001. const std::string_view monthName{start, cnt};
  2002. ui8 month = 0U;
  2003. if (!ValidateMonthFullName(monthName, month)) {
  2004. return false;
  2005. }
  2006. SetMonth(result, month);
  2007. return true;
  2008. });
  2009. break;
  2010. }
  2011. default:
  2012. ythrow yexception() << "invalid format character: " << *ptr;
  2013. }
  2014. dataStart = ptr + 1U;
  2015. }
  2016. if (dataSize) {
  2017. Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
  2018. }
  2019. }
  2020. };
  2021. #define PARSE_SPECIFIC_FORMAT(format) \
  2022. SIMPLE_STRICT_UDF(TParse##format, TOptional<TResource<TMResourceName>>(TAutoMap<char*>)) { \
  2023. auto str = args[0].AsStringRef(); \
  2024. TInstant instant; \
  2025. if (!TInstant::TryParse##format(TStringBuf(str.Data(), str.Size()), instant) || instant.Seconds() >= NUdf::MAX_DATETIME) { \
  2026. return TUnboxedValuePod(); \
  2027. } \
  2028. auto& builder = valueBuilder->GetDateBuilder(); \
  2029. TUnboxedValuePod result(0); \
  2030. auto& storage = Reference(result); \
  2031. storage.FromTimestamp(builder, instant.MicroSeconds()); \
  2032. return result; \
  2033. }
  2034. PARSE_SPECIFIC_FORMAT(Rfc822);
  2035. PARSE_SPECIFIC_FORMAT(Iso8601);
  2036. PARSE_SPECIFIC_FORMAT(Http);
  2037. PARSE_SPECIFIC_FORMAT(X509);
  2038. SIMPLE_MODULE(TDateTime2Module,
  2039. TUserDataTypeFuncFactory<true, true, SplitName, TSplit,
  2040. TDate,
  2041. TDatetime,
  2042. TTimestamp,
  2043. TTzDate,
  2044. TTzDatetime,
  2045. TTzTimestamp,
  2046. TDate32,
  2047. TDatetime64,
  2048. TTimestamp64>,
  2049. TMakeDate,
  2050. TMakeDatetime,
  2051. TMakeTimestamp,
  2052. TMakeTzDate,
  2053. TMakeTzDatetime,
  2054. TMakeTzTimestamp,
  2055. TConvert,
  2056. TMakeDate32,
  2057. TMakeDatetime64,
  2058. TMakeTimestamp64,
  2059. TGetYear,
  2060. TGetDayOfYear,
  2061. TGetMonth,
  2062. TGetMonthName,
  2063. TGetWeekOfYear,
  2064. TGetWeekOfYearIso8601,
  2065. TGetDayOfMonth,
  2066. TGetDayOfWeek,
  2067. TGetDayOfWeekName,
  2068. TGetTimeComponent<GetHourName, ui8, GetHour, 1u, 3600u, 24u, false>,
  2069. TGetTimeComponent<GetMinuteName, ui8, GetMinute, 1u, 60u, 60u, false>,
  2070. TGetTimeComponent<GetSecondName, ui8, GetSecond, 1u, 1u, 60u, false>,
  2071. TGetTimeComponent<GetMillisecondOfSecondName, ui32, GetMicrosecond, 1000u, 1000u, 1000u, true>,
  2072. TGetTimeComponent<GetMicrosecondOfSecondName, ui32, GetMicrosecond, 1u, 1u, 1000000u, true>,
  2073. TGetTimezoneId,
  2074. TGetTimezoneName,
  2075. TUpdate,
  2076. TFromSeconds,
  2077. TFromMilliseconds,
  2078. TFromMicroseconds,
  2079. TIntervalFromDays,
  2080. TIntervalFromHours,
  2081. TIntervalFromMinutes,
  2082. TIntervalFromSeconds,
  2083. TIntervalFromMilliseconds,
  2084. TIntervalFromMicroseconds,
  2085. TToDays,
  2086. TToHours,
  2087. TToMinutes,
  2088. TStartOfYear,
  2089. TStartOfQuarter,
  2090. TStartOfMonth,
  2091. TStartOfWeek,
  2092. TStartOfDay,
  2093. TStartOf,
  2094. TTimeOfDay,
  2095. TShiftYears,
  2096. TShiftQuarters,
  2097. TShiftMonths,
  2098. TEndOfYear,
  2099. TEndOfQuarter,
  2100. TEndOfMonth,
  2101. TEndOfWeek,
  2102. TEndOfDay,
  2103. TToUnits<ToSecondsName, ui32, 1>,
  2104. TToUnits<ToMillisecondsName, ui64, 1000>,
  2105. TToUnits<ToMicrosecondsName, ui64, 1000000>,
  2106. TFormat,
  2107. TParse,
  2108. TParseRfc822,
  2109. TParseIso8601,
  2110. TParseHttp,
  2111. TParseX509
  2112. )
  2113. }
  2114. REGISTER_MODULES(TDateTime2Module)