common.h 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307
  1. #pragma once
  2. ///
  3. /// @file yt/cpp/mapreduce/interface/common.h
  4. ///
  5. /// Header containing miscellaneous structs and classes used in library.
  6. #include "fwd.h"
  7. #include <library/cpp/type_info/type_info.h>
  8. #include <library/cpp/yson/node/node.h>
  9. #include <util/generic/guid.h>
  10. #include <util/generic/map.h>
  11. #include <util/generic/maybe.h>
  12. #include <util/generic/ptr.h>
  13. #include <util/system/type_name.h>
  14. #include <util/generic/vector.h>
  15. #include <google/protobuf/message.h>
  16. #include <initializer_list>
  17. #include <type_traits>
  18. namespace NYT {
  19. ////////////////////////////////////////////////////////////////////////////////
  20. /// @cond Doxygen_Suppress
  21. #define FLUENT_FIELD(type, name) \
  22. type name##_; \
  23. TSelf& name(const type& value) \
  24. { \
  25. name##_ = value; \
  26. return static_cast<TSelf&>(*this); \
  27. } \
  28. static_assert(true)
  29. #define FLUENT_FIELD_ENCAPSULATED(type, name) \
  30. private: \
  31. type name##_; \
  32. public: \
  33. TSelf& name(const type& value) & \
  34. { \
  35. name##_ = value; \
  36. return static_cast<TSelf&>(*this); \
  37. } \
  38. TSelf name(const type& value) && \
  39. { \
  40. name##_ = value; \
  41. return static_cast<TSelf&>(*this); \
  42. } \
  43. const type& name() const & \
  44. { \
  45. return name##_; \
  46. } \
  47. type name() && \
  48. { \
  49. return name##_; \
  50. } \
  51. static_assert(true)
  52. #define FLUENT_FIELD_OPTION(type, name) \
  53. TMaybe<type> name##_; \
  54. TSelf& name(const type& value) \
  55. { \
  56. name##_ = value; \
  57. return static_cast<TSelf&>(*this); \
  58. } \
  59. static_assert(true)
  60. #define FLUENT_FIELD_OPTION_ENCAPSULATED(type, name) \
  61. private: \
  62. TMaybe<type> name##_; \
  63. public: \
  64. TSelf& name(const type& value) & \
  65. { \
  66. name##_ = value; \
  67. return static_cast<TSelf&>(*this); \
  68. } \
  69. TSelf name(const type& value) && \
  70. { \
  71. name##_ = value; \
  72. return static_cast<TSelf&>(*this); \
  73. } \
  74. TSelf& Reset##name() & \
  75. { \
  76. name##_ = Nothing(); \
  77. return static_cast<TSelf&>(*this); \
  78. } \
  79. TSelf Reset##name() && \
  80. { \
  81. name##_ = Nothing(); \
  82. return static_cast<TSelf&>(*this); \
  83. } \
  84. const TMaybe<type>& name() const& \
  85. { \
  86. return name##_; \
  87. } \
  88. TMaybe<type> name() && \
  89. { \
  90. return name##_; \
  91. } \
  92. static_assert(true)
  93. #define FLUENT_FIELD_DEFAULT(type, name, defaultValue) \
  94. type name##_ = defaultValue; \
  95. TSelf& name(const type& value) \
  96. { \
  97. name##_ = value; \
  98. return static_cast<TSelf&>(*this); \
  99. } \
  100. static_assert(true)
  101. #define FLUENT_FIELD_DEFAULT_ENCAPSULATED(type, name, defaultValue) \
  102. private: \
  103. type name##_ = defaultValue; \
  104. public: \
  105. TSelf& name(const type& value) & \
  106. { \
  107. name##_ = value; \
  108. return static_cast<TSelf&>(*this); \
  109. } \
  110. TSelf name(const type& value) && \
  111. { \
  112. name##_ = value; \
  113. return static_cast<TSelf&>(*this); \
  114. } \
  115. const type& name() const & \
  116. { \
  117. return name##_; \
  118. } \
  119. type name() && \
  120. { \
  121. return name##_; \
  122. } \
  123. static_assert(true)
  124. #define FLUENT_VECTOR_FIELD(type, name) \
  125. TVector<type> name##s_; \
  126. TSelf& Add##name(const type& value) \
  127. { \
  128. name##s_.push_back(value); \
  129. return static_cast<TSelf&>(*this);\
  130. } \
  131. TSelf& name##s(TVector<type> values) \
  132. { \
  133. name##s_ = std::move(values); \
  134. return static_cast<TSelf&>(*this);\
  135. } \
  136. static_assert(true)
  137. #define FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(type, name) \
  138. private: \
  139. TMaybe<TVector<type>> name##s_; \
  140. public: \
  141. const TMaybe<TVector<type>>& name##s() const & { \
  142. return name##s_; \
  143. } \
  144. TMaybe<TVector<type>>& name##s() & { \
  145. return name##s_; \
  146. } \
  147. TMaybe<TVector<type>> name##s() && { \
  148. return std::move(name##s_); \
  149. } \
  150. TSelf& Add##name(const type& value) & \
  151. { \
  152. if (name##s_.Empty()) { \
  153. name##s_.ConstructInPlace(); \
  154. } \
  155. name##s_->push_back(value); \
  156. return static_cast<TSelf&>(*this);\
  157. } \
  158. TSelf Add##name(const type& value) && \
  159. { \
  160. if (name##s_.Empty()) { \
  161. name##s_.ConstructInPlace(); \
  162. } \
  163. name##s_->push_back(value); \
  164. return static_cast<TSelf&&>(*this);\
  165. } \
  166. TSelf& name##s(TVector<type> values) & \
  167. { \
  168. name##s_ = std::move(values); \
  169. return static_cast<TSelf&>(*this);\
  170. } \
  171. TSelf name##s(TVector<type> values) && \
  172. { \
  173. name##s_ = std::move(values); \
  174. return static_cast<TSelf&&>(*this);\
  175. } \
  176. TSelf& name##s(TNothing) & \
  177. { \
  178. name##s_ = Nothing(); \
  179. return static_cast<TSelf&>(*this);\
  180. } \
  181. TSelf name##s(TNothing) && \
  182. { \
  183. name##s_ = Nothing(); \
  184. return static_cast<TSelf&&>(*this);\
  185. } \
  186. TSelf& Reset##name##s() & \
  187. { \
  188. name##s_ = Nothing(); \
  189. return static_cast<TSelf&>(*this);\
  190. } \
  191. TSelf Reset##name##s() && \
  192. { \
  193. name##s_ = Nothing(); \
  194. return static_cast<TSelf&&>(*this);\
  195. } \
  196. static_assert(true)
  197. #define FLUENT_VECTOR_FIELD_ENCAPSULATED(type, name) \
  198. private: \
  199. TVector<type> name##s_; \
  200. public: \
  201. TSelf& Add##name(const type& value) & \
  202. { \
  203. name##s_.push_back(value); \
  204. return static_cast<TSelf&>(*this);\
  205. } \
  206. TSelf Add##name(const type& value) && \
  207. { \
  208. name##s_.push_back(value); \
  209. return static_cast<TSelf&>(*this);\
  210. } \
  211. TSelf& name##s(TVector<type> value) & \
  212. { \
  213. name##s_ = std::move(value); \
  214. return static_cast<TSelf&>(*this);\
  215. } \
  216. TSelf name##s(TVector<type> value) && \
  217. { \
  218. name##s_ = std::move(value); \
  219. return static_cast<TSelf&>(*this);\
  220. } \
  221. const TVector<type>& name##s() const & \
  222. { \
  223. return name##s_; \
  224. } \
  225. TVector<type> name##s() && \
  226. { \
  227. return name##s_; \
  228. } \
  229. static_assert(true)
  230. #define FLUENT_MAP_FIELD(keytype, valuetype, name) \
  231. TMap<keytype,valuetype> name##_; \
  232. TSelf& Add##name(const keytype& key, const valuetype& value) \
  233. { \
  234. name##_.emplace(key, value); \
  235. return static_cast<TSelf&>(*this);\
  236. } \
  237. static_assert(true)
  238. /// @endcond
  239. ////////////////////////////////////////////////////////////////////////////////
  240. ///
  241. /// @brief Convenience class that keeps sequence of items.
  242. ///
  243. /// Designed to be used as function parameter.
  244. ///
  245. /// Users of such function can then pass:
  246. /// - single item,
  247. /// - initializer list of items,
  248. /// - vector of items;
  249. /// as argument to this function.
  250. ///
  251. /// Example:
  252. /// ```
  253. /// void Foo(const TOneOrMany<int>& arg);
  254. /// ...
  255. /// Foo(1); // ok
  256. /// Foo({1, 2, 3}); // ok
  257. /// ```
  258. template <class T, class TDerived>
  259. struct TOneOrMany
  260. {
  261. /// @cond Doxygen_Suppress
  262. using TSelf = std::conditional_t<std::is_void_v<TDerived>, TOneOrMany, TDerived>;
  263. /// @endcond
  264. /// Initialize with empty sequence.
  265. TOneOrMany() = default;
  266. // Initialize from initializer list.
  267. template<class U>
  268. TOneOrMany(std::initializer_list<U> il)
  269. {
  270. Parts_.assign(il.begin(), il.end());
  271. }
  272. /// Put arguments to sequence
  273. template <class U, class... TArgs>
  274. requires std::is_convertible_v<U, T>
  275. TOneOrMany(U&& arg, TArgs&&... args)
  276. {
  277. Add(arg, std::forward<TArgs>(args)...);
  278. }
  279. /// Initialize from vector.
  280. TOneOrMany(TVector<T> args)
  281. : Parts_(std::move(args))
  282. { }
  283. /// @brief Order is defined the same way as in TVector
  284. bool operator==(const TOneOrMany& rhs) const
  285. {
  286. // N.B. We would like to make this method to be `= default`,
  287. // but this breaks MSVC compiler for the cases when T doesn't
  288. // support comparison.
  289. return Parts_ == rhs.Parts_;
  290. }
  291. ///
  292. /// @{
  293. ///
  294. /// @brief Add all arguments to sequence
  295. template <class U, class... TArgs>
  296. requires std::is_convertible_v<U, T>
  297. TSelf& Add(U&& part, TArgs&&... args) &
  298. {
  299. Parts_.push_back(std::forward<U>(part));
  300. if constexpr (sizeof...(args) > 0) {
  301. [[maybe_unused]] int dummy[sizeof...(args)] = {(Parts_.push_back(std::forward<TArgs>(args)), 0) ... };
  302. }
  303. return static_cast<TSelf&>(*this);
  304. }
  305. template <class U, class... TArgs>
  306. requires std::is_convertible_v<U, T>
  307. TSelf Add(U&& part, TArgs&&... args) &&
  308. {
  309. return std::move(Add(std::forward<U>(part), std::forward<TArgs>(args)...));
  310. }
  311. /// @}
  312. /// Content of sequence.
  313. TVector<T> Parts_;
  314. };
  315. ////////////////////////////////////////////////////////////////////////////////
  316. ///
  317. /// @brief Type of the value that can occur in YT table.
  318. ///
  319. /// @ref NYT::TTableSchema
  320. /// https://ytsaurus.tech/docs/en/user-guide/storage/data-types
  321. enum EValueType : int
  322. {
  323. /// Int64, signed integer of 64 bits.
  324. VT_INT64,
  325. /// Uint64, unsigned integer of 64 bits.
  326. VT_UINT64,
  327. /// Double, floating point number of double precision (64 bits).
  328. VT_DOUBLE,
  329. /// Boolean, `true` or `false`.
  330. VT_BOOLEAN,
  331. /// String, arbitrary byte sequence.
  332. VT_STRING,
  333. /// Any, arbitrary yson document.
  334. VT_ANY,
  335. /// Int8, signed integer of 8 bits.
  336. VT_INT8,
  337. /// Int16, signed integer of 16 bits.
  338. VT_INT16,
  339. /// Int32, signed integer of 32 bits.
  340. VT_INT32,
  341. /// Uint8, unsigned integer of 8 bits.
  342. VT_UINT8,
  343. /// Uint16, unsigned integer of 16 bits.
  344. VT_UINT16,
  345. /// Uint32, unsigned integer of 32 bits.
  346. VT_UINT32,
  347. /// Utf8, byte sequence that is valid utf8.
  348. VT_UTF8,
  349. /// Null, absence of value (almost never used in schemas)
  350. VT_NULL,
  351. /// Void, absence of value (almost never used in schemas) the difference between null, and void is yql-specific.
  352. VT_VOID,
  353. /// Date, number of days since Unix epoch (unsigned)
  354. VT_DATE,
  355. /// Datetime, number of seconds since Unix epoch (unsigned)
  356. VT_DATETIME,
  357. /// Timestamp, number of milliseconds since Unix epoch (unsigned)
  358. VT_TIMESTAMP,
  359. /// Interval, difference between two timestamps (signed)
  360. VT_INTERVAL,
  361. /// Float, floating point number (32 bits)
  362. VT_FLOAT,
  363. /// Json, sequence of bytes that is valid json.
  364. VT_JSON,
  365. };
  366. ///
  367. /// @brief Sort order.
  368. ///
  369. /// @ref NYT::TTableSchema
  370. enum ESortOrder : int
  371. {
  372. /// Ascending sort order.
  373. SO_ASCENDING /* "ascending" */,
  374. /// Descending sort order.
  375. SO_DESCENDING /* "descending" */,
  376. };
  377. ///
  378. /// @brief Value of "optimize_for" attribute.
  379. ///
  380. /// @ref NYT::TRichYPath
  381. enum EOptimizeForAttr : i8
  382. {
  383. /// Optimize for scan
  384. OF_SCAN_ATTR /* "scan" */,
  385. /// Optimize for lookup
  386. OF_LOOKUP_ATTR /* "lookup" */,
  387. };
  388. ///
  389. /// @brief Value of "erasure_codec" attribute.
  390. ///
  391. /// @ref NYT::TRichYPath
  392. enum EErasureCodecAttr : i8
  393. {
  394. /// @cond Doxygen_Suppress
  395. EC_NONE_ATTR /* "none" */,
  396. EC_REED_SOLOMON_6_3_ATTR /* "reed_solomon_6_3" */,
  397. EC_LRC_12_2_2_ATTR /* "lrc_12_2_2" */,
  398. EC_ISA_LRC_12_2_2_ATTR /* "isa_lrc_12_2_2" */,
  399. /// @endcond
  400. };
  401. ///
  402. /// @brief Value of "schema_modification" attribute.
  403. ///
  404. /// @ref NYT::TRichYPath
  405. enum ESchemaModificationAttr : i8
  406. {
  407. SM_NONE_ATTR /* "none" */,
  408. SM_UNVERSIONED_UPDATE /* "unversioned_update" */,
  409. };
  410. ////////////////////////////////////////////////////////////////////////////////
  411. ///
  412. /// @brief Table key column description.
  413. ///
  414. /// The description includes column name and sort order.
  415. ///
  416. /// @anchor TSortOrder_backward_compatibility
  417. /// @note
  418. /// Many functions that use `TSortOrder` as argument used to take `TString`
  419. /// (the only allowed sort order was "ascending" and user didn't have to specify it).
  420. /// @note
  421. /// This class is designed to provide backward compatibility for such code and therefore
  422. /// objects of this class can be constructed and assigned from TString-like objects only.
  423. ///
  424. /// @see NYT::TSortOperationSpec
  425. class TSortColumn
  426. {
  427. public:
  428. /// @cond Doxygen_Suppress
  429. using TSelf = TSortColumn;
  430. /// @endcond
  431. /// Column name
  432. FLUENT_FIELD_ENCAPSULATED(TString, Name);
  433. /// Sort order
  434. FLUENT_FIELD_DEFAULT_ENCAPSULATED(ESortOrder, SortOrder, ESortOrder::SO_ASCENDING);
  435. ///
  436. /// @{
  437. ///
  438. /// @brief Construct object from name and sort order
  439. ///
  440. /// Constructors are intentionally implicit so `TSortColumn` can be compatible with old code.
  441. /// @ref TSortOrder_backward_compatibility
  442. TSortColumn(TStringBuf name = {}, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  443. TSortColumn(const TString& name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  444. TSortColumn(const char* name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  445. /// @}
  446. /// Check that sort order is ascending, throw exception otherwise.
  447. const TSortColumn& EnsureAscending() const;
  448. /// @brief Convert sort to yson representation as YT API expects it.
  449. TNode ToNode() const;
  450. /// @brief Comparison is default and checks both name and sort order.
  451. bool operator == (const TSortColumn& rhs) const = default;
  452. ///
  453. /// @{
  454. ///
  455. /// @brief Assign object from column name, and set sort order to `ascending`.
  456. ///
  457. /// This is backward compatibility methods.
  458. ///
  459. /// @ref TSortOrder_backward_compatibility
  460. TSortColumn& operator = (TStringBuf name);
  461. TSortColumn& operator = (const TString& name);
  462. TSortColumn& operator = (const char* name);
  463. /// @}
  464. bool operator == (const TStringBuf rhsName) const;
  465. bool operator == (const TString& rhsName) const;
  466. bool operator == (const char* rhsName) const;
  467. // Intentionally implicit conversions.
  468. operator TString() const;
  469. operator TStringBuf() const;
  470. operator std::string() const;
  471. Y_SAVELOAD_DEFINE(Name_, SortOrder_);
  472. };
  473. ///
  474. /// @brief List of @ref TSortColumn
  475. ///
  476. /// Contains a bunch of helper methods such as constructing from single object.
  477. class TSortColumns
  478. : public TOneOrMany<TSortColumn, TSortColumns>
  479. {
  480. public:
  481. using TOneOrMany<TSortColumn, TSortColumns>::TOneOrMany;
  482. /// Construct empty list.
  483. TSortColumns();
  484. ///
  485. /// @{
  486. ///
  487. /// @brief Construct list of ascending sort order columns by their names.
  488. ///
  489. /// Required for backward compatibility.
  490. ///
  491. /// @ref TSortOrder_backward_compatibility
  492. TSortColumns(const TVector<TString>& names);
  493. TSortColumns(const TColumnNames& names);
  494. /// @}
  495. ///
  496. /// @brief Implicit conversion to column list.
  497. ///
  498. /// If all columns has ascending sort order return list of their names.
  499. /// Throw exception otherwise.
  500. ///
  501. /// Required for backward compatibility.
  502. ///
  503. /// @ref TSortOrder_backward_compatibility
  504. operator TColumnNames() const;
  505. /// Make sure that all columns are of ascending sort order.
  506. const TSortColumns& EnsureAscending() const;
  507. /// Get list of column names.
  508. TVector<TString> GetNames() const;
  509. };
  510. ////////////////////////////////////////////////////////////////////////////////
  511. /// Helper function to create new style type from old style one.
  512. NTi::TTypePtr ToTypeV3(EValueType type, bool required);
  513. ///
  514. /// @brief Single column description
  515. ///
  516. /// Each field describing column has setter and getter.
  517. ///
  518. /// Example reading field:
  519. /// ```
  520. /// ... columnSchema.Name() ...
  521. /// ```
  522. ///
  523. /// Example setting field:
  524. /// ```
  525. /// columnSchema.Name("my-column").Type(VT_INT64); // set name and type
  526. /// ```
  527. ///
  528. /// @ref https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  529. class TColumnSchema
  530. {
  531. public:
  532. /// @cond Doxygen_Suppress
  533. using TSelf = TColumnSchema;
  534. /// @endcond
  535. ///
  536. /// @brief Construct empty column schemas
  537. ///
  538. /// @note
  539. /// Such schema cannot be used in schema as it it doesn't have name.
  540. TColumnSchema();
  541. ///
  542. /// @{
  543. ///
  544. /// @brief Copy and move constructors are default.
  545. TColumnSchema(const TColumnSchema&) = default;
  546. TColumnSchema& operator=(const TColumnSchema&) = default;
  547. /// @}
  548. FLUENT_FIELD_ENCAPSULATED(TString, Name);
  549. ///
  550. /// @brief Functions to work with type in old manner.
  551. ///
  552. /// @deprecated New code is recommended to work with types using @ref NTi::TTypePtr from type_info library.
  553. TColumnSchema& Type(EValueType type) &;
  554. TColumnSchema Type(EValueType type) &&;
  555. EValueType Type() const;
  556. /// @brief Set and get column type.
  557. /// @{
  558. TColumnSchema& Type(const NTi::TTypePtr& type) &;
  559. TColumnSchema Type(const NTi::TTypePtr& type) &&;
  560. TColumnSchema& TypeV3(const NTi::TTypePtr& type) &;
  561. TColumnSchema TypeV3(const NTi::TTypePtr& type) &&;
  562. NTi::TTypePtr TypeV3() const;
  563. /// @}
  564. ///
  565. /// @brief Raw yson representation of column type
  566. /// @deprecated Prefer to use `TypeV3` methods.
  567. FLUENT_FIELD_OPTION_ENCAPSULATED(TNode, RawTypeV3);
  568. /// Column sort order
  569. FLUENT_FIELD_OPTION_ENCAPSULATED(ESortOrder, SortOrder);
  570. ///
  571. /// @brief Lock group name
  572. ///
  573. /// @ref https://ytsaurus.tech/docs/en/user-guide/dynamic-tables/sorted-dynamic-tables#locking-rows
  574. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Lock);
  575. /// Expression defining column value
  576. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Expression);
  577. /// Aggregating function name
  578. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Aggregate);
  579. ///
  580. /// @brief Storage group name
  581. ///
  582. /// @ref https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  583. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Group);
  584. // StableName for renamed and deleted columns.
  585. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, StableName);
  586. /// Deleted column
  587. FLUENT_FIELD_OPTION_ENCAPSULATED(bool, Deleted);
  588. ///
  589. /// @brief Column requiredness.
  590. ///
  591. /// Required columns doesn't accept NULL values.
  592. /// Usually if column is required it means that it has Optional<...> type
  593. bool Required() const;
  594. ///
  595. /// @{
  596. ///
  597. /// @brief Set type in old-style manner
  598. TColumnSchema& Type(EValueType type, bool required) &;
  599. TColumnSchema Type(EValueType type, bool required) &&;
  600. /// @}
  601. private:
  602. friend void Deserialize(TColumnSchema& columnSchema, const TNode& node);
  603. NTi::TTypePtr TypeV3_;
  604. bool Required_ = false;
  605. };
  606. /// Equality check checks all fields of column schema.
  607. bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs);
  608. ///
  609. /// @brief Description of table schema
  610. ///
  611. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  612. class TTableSchema
  613. {
  614. public:
  615. /// @cond Doxygen_Suppress
  616. using TSelf = TTableSchema;
  617. /// @endcond
  618. /// Column schema
  619. FLUENT_VECTOR_FIELD_ENCAPSULATED(TColumnSchema, Column);
  620. ///
  621. /// @brief Strictness of the schema
  622. ///
  623. /// Strict schemas are not allowed to have columns not described in schema.
  624. /// Nonstrict schemas are allowed to have such columns, all such missing columns are assumed to have
  625. FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, Strict, true);
  626. ///
  627. /// @brief Whether keys are unique
  628. ///
  629. /// This flag can be set only for schemas that have sorted columns.
  630. /// If flag is set table cannot have multiple rows with same key.
  631. FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, UniqueKeys, false);
  632. /// Get modifiable column list
  633. TVector<TColumnSchema>& MutableColumns();
  634. /// Check if schema has any described column
  635. [[nodiscard]] bool Empty() const;
  636. /// Add column
  637. TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &;
  638. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  639. TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&;
  640. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  641. TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type) &;
  642. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  643. TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type) &&;
  644. /// Add optional column of specified type
  645. TTableSchema& AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &;
  646. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  647. TTableSchema AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&;
  648. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  649. TTableSchema& AddColumn(const TString& name, EValueType type) &;
  650. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  651. TTableSchema AddColumn(const TString& name, EValueType type) &&;
  652. ///
  653. /// @brief Make table schema sorted by specified columns
  654. ///
  655. /// Resets old key columns if any
  656. TTableSchema& SortBy(const TSortColumns& columns) &;
  657. /// @copydoc NYT::TTableSchema::SortBy(const TSortColumns&)&;
  658. TTableSchema SortBy(const TSortColumns& columns) &&;
  659. /// Get yson description of table schema
  660. [[nodiscard]] TNode ToNode() const;
  661. /// Parse schema from yson node
  662. static NYT::TTableSchema FromNode(const TNode& node);
  663. friend void Deserialize(TTableSchema& tableSchema, const TNode& node);
  664. };
  665. /// Check for equality of all columns and all schema attributes
  666. bool operator==(const TTableSchema& lhs, const TTableSchema& rhs);
  667. // Pretty printer for unittests
  668. void PrintTo(const TTableSchema& schema, std::ostream* out);
  669. /// Create table schema by protobuf message descriptor
  670. TTableSchema CreateTableSchema(
  671. const ::google::protobuf::Descriptor& messageDescriptor,
  672. const TSortColumns& sortColumns = TSortColumns(),
  673. bool keepFieldsWithoutExtension = true);
  674. /// Create table schema by protobuf message type
  675. template <class TProtoType, typename = std::enable_if_t<std::is_base_of_v<::google::protobuf::Message, TProtoType>>>
  676. inline TTableSchema CreateTableSchema(
  677. const TSortColumns& sortColumns = TSortColumns(),
  678. bool keepFieldsWithoutExtension = true)
  679. {
  680. static_assert(
  681. std::is_base_of_v<::google::protobuf::Message, TProtoType>,
  682. "Template argument must be derived from ::google::protobuf::Message");
  683. return CreateTableSchema(
  684. *TProtoType::descriptor(),
  685. sortColumns,
  686. keepFieldsWithoutExtension);
  687. }
  688. ///
  689. /// @brief Create strict table schema from `struct` type.
  690. ///
  691. /// Names and types of columns are taken from struct member names and types.
  692. /// `Strict` flag is set to true, all other attribute of schema and columns
  693. /// are left with default values
  694. TTableSchema CreateTableSchema(NTi::TTypePtr type);
  695. ////////////////////////////////////////////////////////////////////////////////
  696. ///
  697. /// @brief Enumeration describing comparison operation used in key bound.
  698. ///
  699. /// ERelation is a part of @ref NYT::TKeyBound that can be used as
  700. /// lower or upper key limit in @ref TReadLimit.
  701. ///
  702. /// Relations `Less` and `LessOrEqual` are for upper limit and
  703. /// relations `Greater` and `GreaterOrEqual` are for lower limit.
  704. ///
  705. /// It is a error to use relation in the limit of wrong kind.
  706. ///
  707. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/ypath#rich_ypath
  708. enum class ERelation
  709. {
  710. ///
  711. /// @brief Relation "less"
  712. ///
  713. /// Specifies range of keys that are before specified key.
  714. /// Can only be used in upper limit.
  715. Less /* "<" */,
  716. ///
  717. /// @brief Relation "less or equal"
  718. ///
  719. /// Specifies range of keys that are before or equal specified key.
  720. /// Can only be used in upper limit.
  721. LessOrEqual /* "<=" */,
  722. ///
  723. /// @brief Relation "greater"
  724. ///
  725. /// Specifies range of keys that are after specified key.
  726. /// Can only be used in lower limit.
  727. Greater /* ">" */,
  728. ///
  729. /// @brief Relation "greater or equal"
  730. ///
  731. /// Specifies range of keys that are after or equal than specified key.
  732. /// Can only be used in lower limit.
  733. GreaterOrEqual /* ">=" */,
  734. };
  735. ///
  736. /// @brief Key with relation specifying interval of keys in lower or upper limit of @ref NYT::TReadRange
  737. ///
  738. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  739. struct TKeyBound
  740. {
  741. /// @cond Doxygen_Suppress
  742. using TSelf = TKeyBound;
  743. explicit TKeyBound(ERelation relation = ERelation::Less, TKey key = TKey{});
  744. FLUENT_FIELD_DEFAULT_ENCAPSULATED(ERelation, Relation, ERelation::Less);
  745. FLUENT_FIELD_DEFAULT_ENCAPSULATED(TKey, Key, TKey{});
  746. /// @endcond
  747. };
  748. ///
  749. /// @brief Description of the read limit.
  750. ///
  751. /// It is actually a variant and must store exactly one field.
  752. ///
  753. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  754. struct TReadLimit
  755. {
  756. /// @cond Doxygen_Suppress
  757. using TSelf = TReadLimit;
  758. /// @endcond
  759. ///
  760. /// @brief KeyBound specifies table key and whether to include it
  761. ///
  762. /// It can be used in lower or upper limit when reading tables.
  763. FLUENT_FIELD_OPTION(TKeyBound, KeyBound);
  764. ///
  765. /// @brief Table key
  766. ///
  767. /// It can be used in exact, lower or upper limit when reading tables.
  768. FLUENT_FIELD_OPTION(TKey, Key);
  769. ///
  770. /// @brief Row index
  771. ///
  772. /// It can be used in exact, lower or upper limit when reading tables.
  773. FLUENT_FIELD_OPTION(i64, RowIndex);
  774. ///
  775. /// @brief File offset
  776. ///
  777. /// It can be used in lower or upper limit when reading files.
  778. FLUENT_FIELD_OPTION(i64, Offset);
  779. ///
  780. /// @brief Tablet index
  781. ///
  782. /// It can be used in lower or upper limit in dynamic table operations
  783. FLUENT_FIELD_OPTION(i64, TabletIndex);
  784. };
  785. ///
  786. /// @brief Range of a table or a file
  787. ///
  788. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  789. struct TReadRange
  790. {
  791. using TSelf = TReadRange;
  792. ///
  793. /// @brief Lower limit of the range
  794. ///
  795. /// It is usually inclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::Greater is used).
  796. FLUENT_FIELD(TReadLimit, LowerLimit);
  797. ///
  798. /// @brief Lower limit of the range
  799. ///
  800. /// It is usually exclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::LessOrEqual is used).
  801. FLUENT_FIELD(TReadLimit, UpperLimit);
  802. /// Exact key or row index.
  803. FLUENT_FIELD(TReadLimit, Exact);
  804. /// Create read range from row indexes.
  805. static TReadRange FromRowIndices(i64 lowerLimit, i64 upperLimit)
  806. {
  807. return TReadRange()
  808. .LowerLimit(TReadLimit().RowIndex(lowerLimit))
  809. .UpperLimit(TReadLimit().RowIndex(upperLimit));
  810. }
  811. /// Create read range from keys.
  812. static TReadRange FromKeys(const TKey& lowerKeyInclusive, const TKey& upperKeyExclusive)
  813. {
  814. return TReadRange()
  815. .LowerLimit(TReadLimit().Key(lowerKeyInclusive))
  816. .UpperLimit(TReadLimit().Key(upperKeyExclusive));
  817. }
  818. };
  819. ///
  820. /// @brief Path with additional attributes.
  821. ///
  822. /// Allows to specify additional attributes for path used in some operations.
  823. ///
  824. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/ypath#rich_ypath
  825. struct TRichYPath
  826. {
  827. /// @cond Doxygen_Suppress
  828. using TSelf = TRichYPath;
  829. /// @endcond
  830. /// Path itself.
  831. FLUENT_FIELD(TYPath, Path);
  832. /// Specifies that path should be appended not overwritten
  833. FLUENT_FIELD_OPTION(bool, Append);
  834. /// @deprecated Deprecated attribute.
  835. FLUENT_FIELD_OPTION(bool, PartiallySorted);
  836. /// Specifies that path is expected to be sorted by these columns.
  837. FLUENT_FIELD(TSortColumns, SortedBy);
  838. /// Add range to read.
  839. TRichYPath& AddRange(TReadRange range)
  840. {
  841. if (!Ranges_) {
  842. Ranges_.ConstructInPlace();
  843. }
  844. Ranges_->push_back(std::move(range));
  845. return *this;
  846. }
  847. TRichYPath& ResetRanges()
  848. {
  849. Ranges_.Clear();
  850. return *this;
  851. }
  852. ///
  853. /// @{
  854. ///
  855. /// Return ranges to read.
  856. ///
  857. /// NOTE: Nothing (in TMaybe) and empty TVector are different ranges.
  858. /// Nothing represents universal range (reader reads all table rows).
  859. /// Empty TVector represents empty range (reader returns empty set of rows).
  860. const TMaybe<TVector<TReadRange>>& GetRanges() const
  861. {
  862. return Ranges_;
  863. }
  864. TMaybe<TVector<TReadRange>>& MutableRanges()
  865. {
  866. return Ranges_;
  867. }
  868. ///
  869. /// @{
  870. ///
  871. /// Get range view, that is convenient way to iterate through all ranges.
  872. TArrayRef<TReadRange> MutableRangesView()
  873. {
  874. if (Ranges_.Defined()) {
  875. return TArrayRef(Ranges_->data(), Ranges_->size());
  876. } else {
  877. return {};
  878. }
  879. }
  880. TArrayRef<const TReadRange> GetRangesView() const
  881. {
  882. if (Ranges_.Defined()) {
  883. return TArrayRef(Ranges_->data(), Ranges_->size());
  884. } else {
  885. return {};
  886. }
  887. }
  888. /// @}
  889. /// @{
  890. ///
  891. /// Get range by index.
  892. const TReadRange& GetRange(ssize_t i) const
  893. {
  894. return Ranges_.GetRef()[i];
  895. }
  896. TReadRange& MutableRange(ssize_t i)
  897. {
  898. return Ranges_.GetRef()[i];
  899. }
  900. /// @}
  901. ///
  902. /// @brief Specifies columns that should be read.
  903. ///
  904. /// If it's set to Nothing then all columns will be read.
  905. /// If empty TColumnNames is specified then each read row will be empty.
  906. FLUENT_FIELD_OPTION(TColumnNames, Columns);
  907. FLUENT_FIELD_OPTION(bool, Teleport);
  908. FLUENT_FIELD_OPTION(bool, Primary);
  909. FLUENT_FIELD_OPTION(bool, Foreign);
  910. FLUENT_FIELD_OPTION(i64, RowCountLimit);
  911. FLUENT_FIELD_OPTION(TString, FileName);
  912. /// Specifies original path to be shown in Web UI
  913. FLUENT_FIELD_OPTION(TYPath, OriginalPath);
  914. ///
  915. /// @brief Specifies that this path points to executable file
  916. ///
  917. /// Used in operation specs.
  918. FLUENT_FIELD_OPTION(bool, Executable);
  919. ///
  920. /// @brief Specify format to use when loading table.
  921. ///
  922. /// Used in operation specs.
  923. FLUENT_FIELD_OPTION(TNode, Format);
  924. /// @brief Specifies table schema that will be set on the path
  925. FLUENT_FIELD_OPTION(TTableSchema, Schema);
  926. /// Specifies compression codec that will be set on the path
  927. FLUENT_FIELD_OPTION(TString, CompressionCodec);
  928. /// Specifies erasure codec that will be set on the path
  929. FLUENT_FIELD_OPTION(EErasureCodecAttr, ErasureCodec);
  930. /// Specifies schema modification that will be set on the path
  931. FLUENT_FIELD_OPTION(ESchemaModificationAttr, SchemaModification);
  932. /// Specifies optimize_for attribute that will be set on the path
  933. FLUENT_FIELD_OPTION(EOptimizeForAttr, OptimizeFor);
  934. ///
  935. /// @brief Do not put file used in operation into node cache
  936. ///
  937. /// If BypassArtifactCache == true, file will be loaded into the job's sandbox bypassing the cache on the YT node.
  938. /// It helps jobs that use tmpfs to start faster,
  939. /// because files will be loaded into tmpfs directly bypassing disk cache
  940. FLUENT_FIELD_OPTION(bool, BypassArtifactCache);
  941. ///
  942. /// @brief Timestamp of dynamic table.
  943. ///
  944. /// NOTE: it is _not_ unix timestamp
  945. /// (instead it's transaction timestamp, that is more complex structure).
  946. FLUENT_FIELD_OPTION(i64, Timestamp);
  947. ///
  948. /// @brief Specify transaction that should be used to access this path.
  949. ///
  950. /// Allows to start cross-transactional operations.
  951. FLUENT_FIELD_OPTION(TTransactionId, TransactionId);
  952. using TRenameColumnsDescriptor = THashMap<TString, TString>;
  953. /// Specifies columnar mapping which will be applied to columns before transfer to job.
  954. FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns);
  955. /// Create empty path with no attributes
  956. TRichYPath()
  957. { }
  958. ///
  959. /// @{
  960. ///
  961. /// @brief Create path from string
  962. TRichYPath(const char* path)
  963. : Path_(path)
  964. { }
  965. TRichYPath(const TYPath& path)
  966. : Path_(path)
  967. { }
  968. /// @}
  969. private:
  970. TMaybe<TVector<TReadRange>> Ranges_;
  971. };
  972. ///
  973. /// @ref Create copy of @ref NYT::TRichYPath with schema derived from proto message.
  974. ///
  975. ///
  976. template <typename TProtoType>
  977. TRichYPath WithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
  978. {
  979. static_assert(std::is_base_of_v<::google::protobuf::Message, TProtoType>, "TProtoType must be Protobuf message");
  980. auto schemedPath = path;
  981. if (!schemedPath.Schema_) {
  982. schemedPath.Schema(CreateTableSchema<TProtoType>(sortBy));
  983. }
  984. return schemedPath;
  985. }
  986. ///
  987. /// @brief Create copy of @ref NYT::TRichYPath with schema derived from TRowType if possible.
  988. ///
  989. /// If TRowType is protobuf message schema is derived from it and set to returned path.
  990. /// Otherwise schema of original path is left unchanged (and probably unset).
  991. template <typename TRowType>
  992. TRichYPath MaybeWithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
  993. {
  994. if constexpr (std::is_base_of_v<::google::protobuf::Message, TRowType>) {
  995. return WithSchema<TRowType>(path, sortBy);
  996. } else {
  997. return path;
  998. }
  999. }
  1000. ///
  1001. /// @brief Get the list of ranges related to path in compatibility mode.
  1002. ///
  1003. /// - If path is missing ranges, empty list is returned.
  1004. /// - If path has associated range list and the list is not empty, function returns this list.
  1005. /// - If path has associated range list and this list is empty, exception is thrown.
  1006. ///
  1007. /// Before YT-17683 RichYPath didn't support empty range list and empty range actually meant universal range.
  1008. /// This function emulates this old behavior.
  1009. ///
  1010. /// @see https://st.yandex-team.ru/YT-17683
  1011. const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path);
  1012. ////////////////////////////////////////////////////////////////////////////////
  1013. /// Statistics about table columns.
  1014. struct TTableColumnarStatistics
  1015. {
  1016. /// Total data weight for all chunks for each of requested columns.
  1017. THashMap<TString, i64> ColumnDataWeight;
  1018. /// Total weight of all old chunks that don't keep columnar statistics.
  1019. i64 LegacyChunksDataWeight = 0;
  1020. /// Timestamps total weight (only for dynamic tables).
  1021. TMaybe<i64> TimestampTotalWeight;
  1022. };
  1023. ////////////////////////////////////////////////////////////////////////////////
  1024. /// Description of a partition.
  1025. struct TMultiTablePartition
  1026. {
  1027. struct TStatistics
  1028. {
  1029. i64 ChunkCount = 0;
  1030. i64 DataWeight = 0;
  1031. i64 RowCount = 0;
  1032. };
  1033. /// Ranges of input tables for this partition.
  1034. TVector<TRichYPath> TableRanges;
  1035. /// Aggregate statistics of all the table ranges in the partition.
  1036. TStatistics AggregateStatistics;
  1037. };
  1038. /// Table partitions from GetTablePartitions command.
  1039. struct TMultiTablePartitions
  1040. {
  1041. /// Disjoint partitions into which the input tables were divided.
  1042. TVector<TMultiTablePartition> Partitions;
  1043. };
  1044. ////////////////////////////////////////////////////////////////////////////////
  1045. ///
  1046. /// @brief Contains information about tablet
  1047. ///
  1048. /// @see NYT::IClient::GetTabletInfos
  1049. struct TTabletInfo
  1050. {
  1051. ///
  1052. /// @brief Indicates the total number of rows added to the tablet (including trimmed ones).
  1053. ///
  1054. /// Currently only provided for ordered tablets.
  1055. i64 TotalRowCount = 0;
  1056. ///
  1057. /// @brief Contains the number of front rows that are trimmed and are not guaranteed to be accessible.
  1058. ///
  1059. /// Only makes sense for ordered tablet.
  1060. i64 TrimmedRowCount = 0;
  1061. ///
  1062. /// @brief Tablet cell barrier timestamp, which lags behind the current timestamp
  1063. ///
  1064. /// It is guaranteed that all transactions with commit timestamp not exceeding the barrier are fully committed;
  1065. /// e.g. all their added rows are visible (and are included in @ref NYT::TTabletInfo::TotalRowCount).
  1066. /// Mostly makes sense for ordered tablets.
  1067. ui64 BarrierTimestamp;
  1068. };
  1069. ////////////////////////////////////////////////////////////////////////////////
  1070. /// List of attributes to retrieve in operations like @ref NYT::ICypressClient::Get
  1071. struct TAttributeFilter
  1072. {
  1073. /// @cond Doxygen_Suppress
  1074. using TSelf = TAttributeFilter;
  1075. /// @endcond
  1076. /// List of attributes.
  1077. FLUENT_VECTOR_FIELD(TString, Attribute);
  1078. };
  1079. ////////////////////////////////////////////////////////////////////////////////
  1080. ///
  1081. /// @brief Check if none of the fields of @ref NYT::TReadLimit is set.
  1082. ///
  1083. /// @return true if any field of readLimit is set and false otherwise.
  1084. bool IsTrivial(const TReadLimit& readLimit);
  1085. /// Convert yson node type to table schema type
  1086. EValueType NodeTypeToValueType(TNode::EType nodeType);
  1087. ////////////////////////////////////////////////////////////////////////////////
  1088. ///
  1089. /// @brief Enumeration for specifying how reading from master is performed.
  1090. ///
  1091. /// Used in operations like NYT::ICypressClient::Get
  1092. enum class EMasterReadKind : int
  1093. {
  1094. ///
  1095. /// @brief Reading from leader.
  1096. ///
  1097. /// Should almost never be used since it's expensive and for regular uses has no difference from
  1098. /// "follower" read.
  1099. Leader /* "leader" */,
  1100. /// @brief Reading from master follower (default).
  1101. Follower /* "follower" */,
  1102. Cache /* "cache" */,
  1103. MasterCache /* "master_cache" */,
  1104. };
  1105. ////////////////////////////////////////////////////////////////////////////////
  1106. /// @cond Doxygen_Suppress
  1107. namespace NDetail {
  1108. // MUST NOT BE USED BY CLIENTS
  1109. // TODO: we should use default GENERATE_ENUM_SERIALIZATION
  1110. TString ToString(EValueType type);
  1111. } // namespace NDetail
  1112. /// @endcond
  1113. ////////////////////////////////////////////////////////////////////////////////
  1114. } // namespace NYT