common.h 37 KB


  1. #pragma once
  2. ///
  3. /// @file yt/cpp/mapreduce/interface/common.h
  4. ///
  5. /// Header containing miscellaneous structs and classes used in library.
  6. #include "fwd.h"
  7. #include <library/cpp/type_info/type_info.h>
  8. #include <library/cpp/yson/node/node.h>
  9. #include <util/generic/guid.h>
  10. #include <util/generic/map.h>
  11. #include <util/generic/maybe.h>
  12. #include <util/generic/ptr.h>
  13. #include <util/system/type_name.h>
  14. #include <util/generic/vector.h>
  15. #include <google/protobuf/message.h>
  16. #include <initializer_list>
  17. #include <type_traits>
  18. namespace NYT {
  19. ////////////////////////////////////////////////////////////////////////////////
  20. /// @cond Doxygen_Suppress
  21. #define FLUENT_FIELD(type, name) \
  22. type name##_; \
  23. TSelf& name(const type& value) \
  24. { \
  25. name##_ = value; \
  26. return static_cast<TSelf&>(*this); \
  27. } \
  28. static_assert(true)
  29. #define FLUENT_FIELD_ENCAPSULATED(type, name) \
  30. private: \
  31. type name##_; \
  32. public: \
  33. TSelf& name(const type& value) & \
  34. { \
  35. name##_ = value; \
  36. return static_cast<TSelf&>(*this); \
  37. } \
  38. TSelf name(const type& value) && \
  39. { \
  40. name##_ = value; \
  41. return static_cast<TSelf&>(*this); \
  42. } \
  43. const type& name() const & \
  44. { \
  45. return name##_; \
  46. } \
  47. type name() && \
  48. { \
  49. return name##_; \
  50. } \
  51. static_assert(true)
  52. #define FLUENT_FIELD_OPTION(type, name) \
  53. TMaybe<type> name##_; \
  54. TSelf& name(const type& value) \
  55. { \
  56. name##_ = value; \
  57. return static_cast<TSelf&>(*this); \
  58. } \
  59. static_assert(true)
  60. #define FLUENT_FIELD_OPTION_ENCAPSULATED(type, name) \
  61. private: \
  62. TMaybe<type> name##_; \
  63. public: \
  64. TSelf& name(const type& value) & \
  65. { \
  66. name##_ = value; \
  67. return static_cast<TSelf&>(*this); \
  68. } \
  69. TSelf name(const type& value) && \
  70. { \
  71. name##_ = value; \
  72. return static_cast<TSelf&>(*this); \
  73. } \
  74. TSelf& Reset##name() & \
  75. { \
  76. name##_ = Nothing(); \
  77. return static_cast<TSelf&>(*this); \
  78. } \
  79. TSelf Reset##name() && \
  80. { \
  81. name##_ = Nothing(); \
  82. return static_cast<TSelf&>(*this); \
  83. } \
  84. const TMaybe<type>& name() const& \
  85. { \
  86. return name##_; \
  87. } \
  88. TMaybe<type> name() && \
  89. { \
  90. return name##_; \
  91. } \
  92. static_assert(true)
  93. #define FLUENT_FIELD_DEFAULT(type, name, defaultValue) \
  94. type name##_ = defaultValue; \
  95. TSelf& name(const type& value) \
  96. { \
  97. name##_ = value; \
  98. return static_cast<TSelf&>(*this); \
  99. } \
  100. static_assert(true)
  101. #define FLUENT_FIELD_DEFAULT_ENCAPSULATED(type, name, defaultValue) \
  102. private: \
  103. type name##_ = defaultValue; \
  104. public: \
  105. TSelf& name(const type& value) & \
  106. { \
  107. name##_ = value; \
  108. return static_cast<TSelf&>(*this); \
  109. } \
  110. TSelf name(const type& value) && \
  111. { \
  112. name##_ = value; \
  113. return static_cast<TSelf&>(*this); \
  114. } \
  115. const type& name() const & \
  116. { \
  117. return name##_; \
  118. } \
  119. type name() && \
  120. { \
  121. return name##_; \
  122. } \
  123. static_assert(true)
  124. #define FLUENT_VECTOR_FIELD(type, name) \
  125. TVector<type> name##s_; \
  126. TSelf& Add##name(const type& value) \
  127. { \
  128. name##s_.push_back(value); \
  129. return static_cast<TSelf&>(*this);\
  130. } \
  131. TSelf& name##s(TVector<type> values) \
  132. { \
  133. name##s_ = std::move(values); \
  134. return static_cast<TSelf&>(*this);\
  135. } \
  136. static_assert(true)
  137. #define FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(type, name) \
  138. private: \
  139. TMaybe<TVector<type>> name##s_; \
  140. public: \
  141. const TMaybe<TVector<type>>& name##s() const & { \
  142. return name##s_; \
  143. } \
  144. TMaybe<TVector<type>>& name##s() & { \
  145. return name##s_; \
  146. } \
  147. TMaybe<TVector<type>> name##s() && { \
  148. return std::move(name##s_); \
  149. } \
  150. TSelf& Add##name(const type& value) & \
  151. { \
  152. if (name##s_.Empty()) { \
  153. name##s_.ConstructInPlace(); \
  154. } \
  155. name##s_->push_back(value); \
  156. return static_cast<TSelf&>(*this);\
  157. } \
  158. TSelf Add##name(const type& value) && \
  159. { \
  160. if (name##s_.Empty()) { \
  161. name##s_.ConstructInPlace(); \
  162. } \
  163. name##s_->push_back(value); \
  164. return static_cast<TSelf&&>(*this);\
  165. } \
  166. TSelf& name##s(TVector<type> values) & \
  167. { \
  168. name##s_ = std::move(values); \
  169. return static_cast<TSelf&>(*this);\
  170. } \
  171. TSelf name##s(TVector<type> values) && \
  172. { \
  173. name##s_ = std::move(values); \
  174. return static_cast<TSelf&&>(*this);\
  175. } \
  176. TSelf& name##s(TNothing) & \
  177. { \
  178. name##s_ = Nothing(); \
  179. return static_cast<TSelf&>(*this);\
  180. } \
  181. TSelf name##s(TNothing) && \
  182. { \
  183. name##s_ = Nothing(); \
  184. return static_cast<TSelf&&>(*this);\
  185. } \
  186. TSelf& Reset##name##s() & \
  187. { \
  188. name##s_ = Nothing(); \
  189. return static_cast<TSelf&>(*this);\
  190. } \
  191. TSelf Reset##name##s() && \
  192. { \
  193. name##s_ = Nothing(); \
  194. return static_cast<TSelf&&>(*this);\
  195. } \
  196. static_assert(true)
  197. #define FLUENT_VECTOR_FIELD_ENCAPSULATED(type, name) \
  198. private: \
  199. TVector<type> name##s_; \
  200. public: \
  201. TSelf& Add##name(const type& value) & \
  202. { \
  203. name##s_.push_back(value); \
  204. return static_cast<TSelf&>(*this);\
  205. } \
  206. TSelf Add##name(const type& value) && \
  207. { \
  208. name##s_.push_back(value); \
  209. return static_cast<TSelf&>(*this);\
  210. } \
  211. TSelf& name##s(TVector<type> value) & \
  212. { \
  213. name##s_ = std::move(value); \
  214. return static_cast<TSelf&>(*this);\
  215. } \
  216. TSelf name##s(TVector<type> value) && \
  217. { \
  218. name##s_ = std::move(value); \
  219. return static_cast<TSelf&>(*this);\
  220. } \
  221. const TVector<type>& name##s() const & \
  222. { \
  223. return name##s_; \
  224. } \
  225. TVector<type> name##s() && \
  226. { \
  227. return name##s_; \
  228. } \
  229. static_assert(true)
  230. #define FLUENT_MAP_FIELD(keytype, valuetype, name) \
  231. TMap<keytype,valuetype> name##_; \
  232. TSelf& Add##name(const keytype& key, const valuetype& value) \
  233. { \
  234. name##_.emplace(key, value); \
  235. return static_cast<TSelf&>(*this);\
  236. } \
  237. static_assert(true)
  238. /// @endcond
  239. ////////////////////////////////////////////////////////////////////////////////
  240. ///
  241. /// @brief Convenience class that keeps sequence of items.
  242. ///
  243. /// Designed to be used as function parameter.
  244. ///
  245. /// Users of such function can then pass:
  246. /// - single item,
  247. /// - initializer list of items,
  248. /// - vector of items;
  249. /// as argument to this function.
  250. ///
  251. /// Example:
  252. /// ```
  253. /// void Foo(const TOneOrMany<int>& arg);
  254. /// ...
  255. /// Foo(1); // ok
  256. /// Foo({1, 2, 3}); // ok
  257. /// ```
  258. template <class T, class TDerived>
  259. struct TOneOrMany
  260. {
  261. /// @cond Doxygen_Suppress
  262. using TSelf = std::conditional_t<std::is_void_v<TDerived>, TOneOrMany, TDerived>;
  263. /// @endcond
  264. /// Initialize with empty sequence.
  265. TOneOrMany() = default;
  266. // Initialize from initializer list.
  267. template<class U>
  268. TOneOrMany(std::initializer_list<U> il)
  269. {
  270. Parts_.assign(il.begin(), il.end());
  271. }
  272. /// Put arguments to sequence
  273. template <class U, class... TArgs>
  274. requires std::is_convertible_v<U, T>
  275. TOneOrMany(U&& arg, TArgs&&... args)
  276. {
  277. Add(arg, std::forward<TArgs>(args)...);
  278. }
  279. /// Initialize from vector.
  280. TOneOrMany(TVector<T> args)
  281. : Parts_(std::move(args))
  282. { }
  283. /// @brief Order is defined the same way as in TVector
  284. bool operator==(const TOneOrMany& rhs) const
  285. {
  286. // N.B. We would like to make this method to be `= default`,
  287. // but this breaks MSVC compiler for the cases when T doesn't
  288. // support comparison.
  289. return Parts_ == rhs.Parts_;
  290. }
  291. ///
  292. /// @{
  293. ///
  294. /// @brief Add all arguments to sequence
  295. template <class U, class... TArgs>
  296. requires std::is_convertible_v<U, T>
  297. TSelf& Add(U&& part, TArgs&&... args) &
  298. {
  299. Parts_.push_back(std::forward<U>(part));
  300. if constexpr (sizeof...(args) > 0) {
  301. [[maybe_unused]] int dummy[sizeof...(args)] = {(Parts_.push_back(std::forward<TArgs>(args)), 0) ... };
  302. }
  303. return static_cast<TSelf&>(*this);
  304. }
  305. template <class U, class... TArgs>
  306. requires std::is_convertible_v<U, T>
  307. TSelf Add(U&& part, TArgs&&... args) &&
  308. {
  309. return std::move(Add(std::forward<U>(part), std::forward<TArgs>(args)...));
  310. }
  311. /// @}
  312. /// Content of sequence.
  313. TVector<T> Parts_;
  314. };
  315. ////////////////////////////////////////////////////////////////////////////////
  316. ///
  317. /// @brief Type of the value that can occur in YT table.
  318. ///
  319. /// @ref NYT::TTableSchema
  320. /// https://ytsaurus.tech/docs/en/user-guide/storage/data-types
  321. enum EValueType : int
  322. {
  323. /// Int64, signed integer of 64 bits.
  324. VT_INT64,
  325. /// Uint64, unsigned integer of 64 bits.
  326. VT_UINT64,
  327. /// Double, floating point number of double precision (64 bits).
  328. VT_DOUBLE,
  329. /// Boolean, `true` or `false`.
  330. VT_BOOLEAN,
  331. /// String, arbitrary byte sequence.
  332. VT_STRING,
  333. /// Any, arbitrary yson document.
  334. VT_ANY,
  335. /// Int8, signed integer of 8 bits.
  336. VT_INT8,
  337. /// Int16, signed integer of 16 bits.
  338. VT_INT16,
  339. /// Int32, signed integer of 32 bits.
  340. VT_INT32,
  341. /// Uint8, unsigned integer of 8 bits.
  342. VT_UINT8,
  343. /// Uint16, unsigned integer of 16 bits.
  344. VT_UINT16,
  345. /// Uint32, unsigned integer of 32 bits.
  346. VT_UINT32,
  347. /// Utf8, byte sequence that is valid utf8.
  348. VT_UTF8,
  349. /// Null, absence of value (almost never used in schemas)
  350. VT_NULL,
  351. /// Void, absence of value (almost never used in schemas) the difference between null, and void is yql-specific.
  352. VT_VOID,
  353. /// Date, number of days since Unix epoch (unsigned)
  354. VT_DATE,
  355. /// Datetime, number of seconds since Unix epoch (unsigned)
  356. VT_DATETIME,
  357. /// Timestamp, number of milliseconds since Unix epoch (unsigned)
  358. VT_TIMESTAMP,
  359. /// Interval, difference between two timestamps (signed)
  360. VT_INTERVAL,
  361. /// Float, floating point number (32 bits)
  362. VT_FLOAT,
  363. /// Json, sequence of bytes that is valid json.
  364. VT_JSON,
  365. };
  366. ///
  367. /// @brief Sort order.
  368. ///
  369. /// @ref NYT::TTableSchema
  370. enum ESortOrder : int
  371. {
  372. /// Ascending sort order.
  373. SO_ASCENDING /* "ascending" */,
  374. /// Descending sort order.
  375. SO_DESCENDING /* "descending" */,
  376. };
  377. ///
  378. /// @brief Value of "optimize_for" attribute.
  379. ///
  380. /// @ref NYT::TRichYPath
  381. enum EOptimizeForAttr : i8
  382. {
  383. /// Optimize for scan
  384. OF_SCAN_ATTR /* "scan" */,
  385. /// Optimize for lookup
  386. OF_LOOKUP_ATTR /* "lookup" */,
  387. };
  388. ///
  389. /// @brief Value of "erasure_codec" attribute.
  390. ///
  391. /// @ref NYT::TRichYPath
  392. enum EErasureCodecAttr : i8
  393. {
  394. /// @cond Doxygen_Suppress
  395. EC_NONE_ATTR /* "none" */,
  396. EC_REED_SOLOMON_6_3_ATTR /* "reed_solomon_6_3" */,
  397. EC_LRC_12_2_2_ATTR /* "lrc_12_2_2" */,
  398. EC_ISA_LRC_12_2_2_ATTR /* "isa_lrc_12_2_2" */,
  399. /// @endcond
  400. };
  401. ///
  402. /// @brief Value of "schema_modification" attribute.
  403. ///
  404. /// @ref NYT::TRichYPath
  405. enum ESchemaModificationAttr : i8
  406. {
  407. SM_NONE_ATTR /* "none" */,
  408. SM_UNVERSIONED_UPDATE /* "unversioned_update" */,
  409. };
  410. ////////////////////////////////////////////////////////////////////////////////
  411. ///
  412. /// @brief Table key column description.
  413. ///
  414. /// The description includes column name and sort order.
  415. ///
  416. /// @anchor TSortOrder_backward_compatibility
  417. /// @note
  418. /// Many functions that use `TSortOrder` as argument used to take `TString`
  419. /// (the only allowed sort order was "ascending" and user didn't have to specify it).
  420. /// @note
  421. /// This class is designed to provide backward compatibility for such code and therefore
  422. /// objects of this class can be constructed and assigned from TString-like objects only.
  423. ///
  424. /// @see NYT::TSortOperationSpec
  425. class TSortColumn
  426. {
  427. public:
  428. /// @cond Doxygen_Suppress
  429. using TSelf = TSortColumn;
  430. /// @endcond
  431. /// Column name
  432. FLUENT_FIELD_ENCAPSULATED(TString, Name);
  433. /// Sort order
  434. FLUENT_FIELD_DEFAULT_ENCAPSULATED(ESortOrder, SortOrder, ESortOrder::SO_ASCENDING);
  435. ///
  436. /// @{
  437. ///
  438. /// @brief Construct object from name and sort order
  439. ///
  440. /// Constructors are intentionally implicit so `TSortColumn` can be compatible with old code.
  441. /// @ref TSortOrder_backward_compatibility
  442. TSortColumn(TStringBuf name = {}, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  443. TSortColumn(const TString& name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  444. TSortColumn(const char* name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  445. /// @}
  446. /// Check that sort order is ascending, throw exception otherwise.
  447. const TSortColumn& EnsureAscending() const;
  448. /// @brief Convert sort to yson representation as YT API expects it.
  449. TNode ToNode() const;
  450. /// @brief Comparison is default and checks both name and sort order.
  451. bool operator == (const TSortColumn& rhs) const = default;
  452. ///
  453. /// @{
  454. ///
  455. /// @brief Assign object from column name, and set sort order to `ascending`.
  456. ///
  457. /// This is backward compatibility methods.
  458. ///
  459. /// @ref TSortOrder_backward_compatibility
  460. TSortColumn& operator = (TStringBuf name);
  461. TSortColumn& operator = (const TString& name);
  462. TSortColumn& operator = (const char* name);
  463. /// @}
  464. bool operator == (const TStringBuf rhsName) const;
  465. bool operator == (const TString& rhsName) const;
  466. bool operator == (const char* rhsName) const;
  467. // Intentionally implicit conversions.
  468. operator TString() const;
  469. operator TStringBuf() const;
  470. operator std::string() const;
  471. Y_SAVELOAD_DEFINE(Name_, SortOrder_);
  472. };
  473. ///
  474. /// @brief List of @ref TSortColumn
  475. ///
  476. /// Contains a bunch of helper methods such as constructing from single object.
  477. class TSortColumns
  478. : public TOneOrMany<TSortColumn, TSortColumns>
  479. {
  480. public:
  481. using TOneOrMany<TSortColumn, TSortColumns>::TOneOrMany;
  482. /// Construct empty list.
  483. TSortColumns();
  484. ///
  485. /// @{
  486. ///
  487. /// @brief Construct list of ascending sort order columns by their names.
  488. ///
  489. /// Required for backward compatibility.
  490. ///
  491. /// @ref TSortOrder_backward_compatibility
  492. TSortColumns(const TVector<TString>& names);
  493. TSortColumns(const TColumnNames& names);
  494. /// @}
  495. ///
  496. /// @brief Implicit conversion to column list.
  497. ///
  498. /// If all columns has ascending sort order return list of their names.
  499. /// Throw exception otherwise.
  500. ///
  501. /// Required for backward compatibility.
  502. ///
  503. /// @ref TSortOrder_backward_compatibility
  504. operator TColumnNames() const;
  505. /// Make sure that all columns are of ascending sort order.
  506. const TSortColumns& EnsureAscending() const;
  507. /// Get list of column names.
  508. TVector<TString> GetNames() const;
  509. };
  510. ////////////////////////////////////////////////////////////////////////////////
  511. /// Helper function to create new style type from old style one.
  512. NTi::TTypePtr ToTypeV3(EValueType type, bool required);
  513. ///
  514. /// @brief Single column description
  515. ///
  516. /// Each field describing column has setter and getter.
  517. ///
  518. /// Example reading field:
  519. /// ```
  520. /// ... columnSchema.Name() ...
  521. /// ```
  522. ///
  523. /// Example setting field:
  524. /// ```
  525. /// columnSchema.Name("my-column").Type(VT_INT64); // set name and type
  526. /// ```
  527. ///
  528. /// @ref https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  529. class TColumnSchema
  530. {
  531. public:
  532. /// @cond Doxygen_Suppress
  533. using TSelf = TColumnSchema;
  534. /// @endcond
  535. ///
  536. /// @brief Construct empty column schemas
  537. ///
  538. /// @note
  539. /// Such schema cannot be used in schema as it it doesn't have name.
  540. TColumnSchema();
  541. ///
  542. /// @{
  543. ///
  544. /// @brief Copy and move constructors are default.
  545. TColumnSchema(const TColumnSchema&) = default;
  546. TColumnSchema& operator=(const TColumnSchema&) = default;
  547. /// @}
  548. FLUENT_FIELD_ENCAPSULATED(TString, Name);
  549. ///
  550. /// @brief Functions to work with type in old manner.
  551. ///
  552. /// @deprecated New code is recommended to work with types using @ref NTi::TTypePtr from type_info library.
  553. TColumnSchema& Type(EValueType type) &;
  554. TColumnSchema Type(EValueType type) &&;
  555. EValueType Type() const;
  556. /// @brief Set and get column type.
  557. /// @{
  558. TColumnSchema& Type(const NTi::TTypePtr& type) &;
  559. TColumnSchema Type(const NTi::TTypePtr& type) &&;
  560. TColumnSchema& TypeV3(const NTi::TTypePtr& type) &;
  561. TColumnSchema TypeV3(const NTi::TTypePtr& type) &&;
  562. NTi::TTypePtr TypeV3() const;
  563. /// @}
  564. ///
  565. /// @brief Raw yson representation of column type
  566. /// @deprecated Prefer to use `TypeV3` methods.
  567. FLUENT_FIELD_OPTION_ENCAPSULATED(TNode, RawTypeV3);
  568. /// Column sort order
  569. FLUENT_FIELD_OPTION_ENCAPSULATED(ESortOrder, SortOrder);
  570. ///
  571. /// @brief Lock group name
  572. ///
  573. /// @ref https://ytsaurus.tech/docs/en/user-guide/dynamic-tables/sorted-dynamic-tables#locking-rows
  574. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Lock);
  575. /// Expression defining column value
  576. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Expression);
  577. /// Aggregating function name
  578. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Aggregate);
  579. ///
  580. /// @brief Storage group name
  581. ///
  582. /// @ref https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  583. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Group);
  584. ///
  585. /// @brief Column requiredness.
  586. ///
  587. /// Required columns doesn't accept NULL values.
  588. /// Usually if column is required it means that it has Optional<...> type
  589. bool Required() const;
  590. ///
  591. /// @{
  592. ///
  593. /// @brief Set type in old-style manner
  594. TColumnSchema& Type(EValueType type, bool required) &;
  595. TColumnSchema Type(EValueType type, bool required) &&;
  596. /// @}
  597. private:
  598. friend void Deserialize(TColumnSchema& columnSchema, const TNode& node);
  599. NTi::TTypePtr TypeV3_;
  600. bool Required_ = false;
  601. };
  602. /// Equality check checks all fields of column schema.
  603. bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs);
  604. ///
  605. /// @brief Description of table schema
  606. ///
  607. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  608. class TTableSchema
  609. {
  610. public:
  611. /// @cond Doxygen_Suppress
  612. using TSelf = TTableSchema;
  613. /// @endcond
  614. /// Column schema
  615. FLUENT_VECTOR_FIELD_ENCAPSULATED(TColumnSchema, Column);
  616. ///
  617. /// @brief Strictness of the schema
  618. ///
  619. /// Strict schemas are not allowed to have columns not described in schema.
  620. /// Nonstrict schemas are allowed to have such columns, all such missing columns are assumed to have
  621. FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, Strict, true);
  622. ///
  623. /// @brief Whether keys are unique
  624. ///
  625. /// This flag can be set only for schemas that have sorted columns.
  626. /// If flag is set table cannot have multiple rows with same key.
  627. FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, UniqueKeys, false);
  628. /// Get modifiable column list
  629. TVector<TColumnSchema>& MutableColumns();
  630. /// Check if schema has any described column
  631. [[nodiscard]] bool Empty() const;
  632. /// Add column
  633. TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &;
  634. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  635. TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&;
  636. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  637. TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type) &;
  638. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  639. TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type) &&;
  640. /// Add optional column of specified type
  641. TTableSchema& AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &;
  642. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  643. TTableSchema AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&;
  644. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  645. TTableSchema& AddColumn(const TString& name, EValueType type) &;
  646. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  647. TTableSchema AddColumn(const TString& name, EValueType type) &&;
  648. ///
  649. /// @brief Make table schema sorted by specified columns
  650. ///
  651. /// Resets old key columns if any
  652. TTableSchema& SortBy(const TSortColumns& columns) &;
  653. /// @copydoc NYT::TTableSchema::SortBy(const TSortColumns&)&;
  654. TTableSchema SortBy(const TSortColumns& columns) &&;
  655. /// Get yson description of table schema
  656. [[nodiscard]] TNode ToNode() const;
  657. /// Parse schema from yson node
  658. static NYT::TTableSchema FromNode(const TNode& node);
  659. friend void Deserialize(TTableSchema& tableSchema, const TNode& node);
  660. };
  661. /// Check for equality of all columns and all schema attributes
  662. bool operator==(const TTableSchema& lhs, const TTableSchema& rhs);
  663. // Pretty printer for unittests
  664. void PrintTo(const TTableSchema& schema, std::ostream* out);
  665. /// Create table schema by protobuf message descriptor
  666. TTableSchema CreateTableSchema(
  667. const ::google::protobuf::Descriptor& messageDescriptor,
  668. const TSortColumns& sortColumns = TSortColumns(),
  669. bool keepFieldsWithoutExtension = true);
  670. /// Create table schema by protobuf message type
  671. template <class TProtoType, typename = std::enable_if_t<std::is_base_of_v<::google::protobuf::Message, TProtoType>>>
  672. inline TTableSchema CreateTableSchema(
  673. const TSortColumns& sortColumns = TSortColumns(),
  674. bool keepFieldsWithoutExtension = true)
  675. {
  676. static_assert(
  677. std::is_base_of_v<::google::protobuf::Message, TProtoType>,
  678. "Template argument must be derived from ::google::protobuf::Message");
  679. return CreateTableSchema(
  680. *TProtoType::descriptor(),
  681. sortColumns,
  682. keepFieldsWithoutExtension);
  683. }
  684. ///
  685. /// @brief Create strict table schema from `struct` type.
  686. ///
  687. /// Names and types of columns are taken from struct member names and types.
  688. /// `Strict` flag is set to true, all other attribute of schema and columns
  689. /// are left with default values
  690. TTableSchema CreateTableSchema(NTi::TTypePtr type);
  691. ////////////////////////////////////////////////////////////////////////////////
  692. ///
  693. /// @brief Enumeration describing comparison operation used in key bound.
  694. ///
  695. /// ERelation is a part of @ref NYT::TKeyBound that can be used as
  696. /// lower or upper key limit in @ref TReadLimit.
  697. ///
  698. /// Relations `Less` and `LessOrEqual` are for upper limit and
  699. /// relations `Greater` and `GreaterOrEqual` are for lower limit.
  700. ///
  701. /// It is a error to use relation in the limit of wrong kind.
  702. ///
  703. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/ypath#rich_ypath
  704. enum class ERelation
  705. {
  706. ///
  707. /// @brief Relation "less"
  708. ///
  709. /// Specifies range of keys that are before specified key.
  710. /// Can only be used in upper limit.
  711. Less /* "<" */,
  712. ///
  713. /// @brief Relation "less or equal"
  714. ///
  715. /// Specifies range of keys that are before or equal specified key.
  716. /// Can only be used in upper limit.
  717. LessOrEqual /* "<=" */,
  718. ///
  719. /// @brief Relation "greater"
  720. ///
  721. /// Specifies range of keys that are after specified key.
  722. /// Can only be used in lower limit.
  723. Greater /* ">" */,
  724. ///
  725. /// @brief Relation "greater or equal"
  726. ///
  727. /// Specifies range of keys that are after or equal than specified key.
  728. /// Can only be used in lower limit.
  729. GreaterOrEqual /* ">=" */,
  730. };
  731. ///
  732. /// @brief Key with relation specifying interval of keys in lower or upper limit of @ref NYT::TReadRange
  733. ///
  734. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  735. struct TKeyBound
  736. {
  737. /// @cond Doxygen_Suppress
  738. using TSelf = TKeyBound;
  739. explicit TKeyBound(ERelation relation = ERelation::Less, TKey key = TKey{});
  740. FLUENT_FIELD_DEFAULT_ENCAPSULATED(ERelation, Relation, ERelation::Less);
  741. FLUENT_FIELD_DEFAULT_ENCAPSULATED(TKey, Key, TKey{});
  742. /// @endcond
  743. };
  744. ///
  745. /// @brief Description of the read limit.
  746. ///
  747. /// It is actually a variant and must store exactly one field.
  748. ///
  749. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  750. struct TReadLimit
  751. {
  752. /// @cond Doxygen_Suppress
  753. using TSelf = TReadLimit;
  754. /// @endcond
  755. ///
  756. /// @brief KeyBound specifies table key and whether to include it
  757. ///
  758. /// It can be used in lower or upper limit when reading tables.
  759. FLUENT_FIELD_OPTION(TKeyBound, KeyBound);
  760. ///
  761. /// @brief Table key
  762. ///
  763. /// It can be used in exact, lower or upper limit when reading tables.
  764. FLUENT_FIELD_OPTION(TKey, Key);
  765. ///
  766. /// @brief Row index
  767. ///
  768. /// It can be used in exact, lower or upper limit when reading tables.
  769. FLUENT_FIELD_OPTION(i64, RowIndex);
  770. ///
  771. /// @brief File offset
  772. ///
  773. /// It can be used in lower or upper limit when reading files.
  774. FLUENT_FIELD_OPTION(i64, Offset);
  775. ///
  776. /// @brief Tablet index
  777. ///
  778. /// It can be used in lower or upper limit in dynamic table operations
  779. FLUENT_FIELD_OPTION(i64, TabletIndex);
  780. };
  781. ///
  782. /// @brief Range of a table or a file
  783. ///
  784. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  785. struct TReadRange
  786. {
  787. using TSelf = TReadRange;
  788. ///
  789. /// @brief Lower limit of the range
  790. ///
  791. /// It is usually inclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::Greater is used).
  792. FLUENT_FIELD(TReadLimit, LowerLimit);
  793. ///
  794. /// @brief Lower limit of the range
  795. ///
  796. /// It is usually exclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::LessOrEqual is used).
  797. FLUENT_FIELD(TReadLimit, UpperLimit);
  798. /// Exact key or row index.
  799. FLUENT_FIELD(TReadLimit, Exact);
  800. /// Create read range from row indexes.
  801. static TReadRange FromRowIndices(i64 lowerLimit, i64 upperLimit)
  802. {
  803. return TReadRange()
  804. .LowerLimit(TReadLimit().RowIndex(lowerLimit))
  805. .UpperLimit(TReadLimit().RowIndex(upperLimit));
  806. }
  807. /// Create read range from keys.
  808. static TReadRange FromKeys(const TKey& lowerKeyInclusive, const TKey& upperKeyExclusive)
  809. {
  810. return TReadRange()
  811. .LowerLimit(TReadLimit().Key(lowerKeyInclusive))
  812. .UpperLimit(TReadLimit().Key(upperKeyExclusive));
  813. }
  814. };
  815. ///
  816. /// @brief Path with additional attributes.
  817. ///
  818. /// Allows to specify additional attributes for path used in some operations.
  819. ///
  820. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/ypath#rich_ypath
  821. struct TRichYPath
  822. {
  823. /// @cond Doxygen_Suppress
  824. using TSelf = TRichYPath;
  825. /// @endcond
  826. /// Path itself.
  827. FLUENT_FIELD(TYPath, Path);
  828. /// Specifies that path should be appended not overwritten
  829. FLUENT_FIELD_OPTION(bool, Append);
  830. /// @deprecated Deprecated attribute.
  831. FLUENT_FIELD_OPTION(bool, PartiallySorted);
  832. /// Specifies that path is expected to be sorted by these columns.
  833. FLUENT_FIELD(TSortColumns, SortedBy);
  834. /// Add range to read.
  835. TRichYPath& AddRange(TReadRange range)
  836. {
  837. if (!Ranges_) {
  838. Ranges_.ConstructInPlace();
  839. }
  840. Ranges_->push_back(std::move(range));
  841. return *this;
  842. }
  843. TRichYPath& ResetRanges()
  844. {
  845. Ranges_.Clear();
  846. return *this;
  847. }
  848. ///
  849. /// @{
  850. ///
  851. /// Return ranges to read.
  852. ///
  853. /// NOTE: Nothing (in TMaybe) and empty TVector are different ranges.
  854. /// Nothing represents universal range (reader reads all table rows).
  855. /// Empty TVector represents empty range (reader returns empty set of rows).
  856. const TMaybe<TVector<TReadRange>>& GetRanges() const
  857. {
  858. return Ranges_;
  859. }
  860. TMaybe<TVector<TReadRange>>& MutableRanges()
  861. {
  862. return Ranges_;
  863. }
  864. ///
  865. /// @{
  866. ///
  867. /// Get range view, that is convenient way to iterate through all ranges.
  868. TArrayRef<TReadRange> MutableRangesView()
  869. {
  870. if (Ranges_.Defined()) {
  871. return TArrayRef(Ranges_->data(), Ranges_->size());
  872. } else {
  873. return {};
  874. }
  875. }
  876. TArrayRef<const TReadRange> GetRangesView() const
  877. {
  878. if (Ranges_.Defined()) {
  879. return TArrayRef(Ranges_->data(), Ranges_->size());
  880. } else {
  881. return {};
  882. }
  883. }
  884. /// @}
  885. /// @{
  886. ///
  887. /// Get range by index.
  888. const TReadRange& GetRange(ssize_t i) const
  889. {
  890. return Ranges_.GetRef()[i];
  891. }
  892. TReadRange& MutableRange(ssize_t i)
  893. {
  894. return Ranges_.GetRef()[i];
  895. }
  896. /// @}
  897. ///
  898. /// @brief Specifies columns that should be read.
  899. ///
  900. /// If it's set to Nothing then all columns will be read.
  901. /// If empty TColumnNames is specified then each read row will be empty.
  902. FLUENT_FIELD_OPTION(TColumnNames, Columns);
  903. FLUENT_FIELD_OPTION(bool, Teleport);
  904. FLUENT_FIELD_OPTION(bool, Primary);
  905. FLUENT_FIELD_OPTION(bool, Foreign);
  906. FLUENT_FIELD_OPTION(i64, RowCountLimit);
  907. FLUENT_FIELD_OPTION(TString, FileName);
  908. /// Specifies original path to be shown in Web UI
  909. FLUENT_FIELD_OPTION(TYPath, OriginalPath);
  910. ///
  911. /// @brief Specifies that this path points to executable file
  912. ///
  913. /// Used in operation specs.
  914. FLUENT_FIELD_OPTION(bool, Executable);
  915. ///
  916. /// @brief Specify format to use when loading table.
  917. ///
  918. /// Used in operation specs.
  919. FLUENT_FIELD_OPTION(TNode, Format);
  920. /// @brief Specifies table schema that will be set on the path
  921. FLUENT_FIELD_OPTION(TTableSchema, Schema);
  922. /// Specifies compression codec that will be set on the path
  923. FLUENT_FIELD_OPTION(TString, CompressionCodec);
  924. /// Specifies erasure codec that will be set on the path
  925. FLUENT_FIELD_OPTION(EErasureCodecAttr, ErasureCodec);
  926. /// Specifies schema modification that will be set on the path
  927. FLUENT_FIELD_OPTION(ESchemaModificationAttr, SchemaModification);
  928. /// Specifies optimize_for attribute that will be set on the path
  929. FLUENT_FIELD_OPTION(EOptimizeForAttr, OptimizeFor);
  930. ///
  931. /// @brief Do not put file used in operation into node cache
  932. ///
  933. /// If BypassArtifactCache == true, file will be loaded into the job's sandbox bypassing the cache on the YT node.
  934. /// It helps jobs that use tmpfs to start faster,
  935. /// because files will be loaded into tmpfs directly bypassing disk cache
  936. FLUENT_FIELD_OPTION(bool, BypassArtifactCache);
  937. ///
  938. /// @brief Timestamp of dynamic table.
  939. ///
  940. /// NOTE: it is _not_ unix timestamp
  941. /// (instead it's transaction timestamp, that is more complex structure).
  942. FLUENT_FIELD_OPTION(i64, Timestamp);
  943. ///
  944. /// @brief Specify transaction that should be used to access this path.
  945. ///
  946. /// Allows to start cross-transactional operations.
  947. FLUENT_FIELD_OPTION(TTransactionId, TransactionId);
  948. using TRenameColumnsDescriptor = THashMap<TString, TString>;
  949. /// Specifies columnar mapping which will be applied to columns before transfer to job.
  950. FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns);
  951. /// Create empty path with no attributes
  952. TRichYPath()
  953. { }
  954. ///
  955. /// @{
  956. ///
  957. /// @brief Create path from string
  958. TRichYPath(const char* path)
  959. : Path_(path)
  960. { }
  961. TRichYPath(const TYPath& path)
  962. : Path_(path)
  963. { }
  964. /// @}
  965. private:
  966. TMaybe<TVector<TReadRange>> Ranges_;
  967. };
  968. ///
  969. /// @ref Create copy of @ref NYT::TRichYPath with schema derived from proto message.
  970. ///
  971. ///
  972. template <typename TProtoType>
  973. TRichYPath WithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
  974. {
  975. static_assert(std::is_base_of_v<::google::protobuf::Message, TProtoType>, "TProtoType must be Protobuf message");
  976. auto schemedPath = path;
  977. if (!schemedPath.Schema_) {
  978. schemedPath.Schema(CreateTableSchema<TProtoType>(sortBy));
  979. }
  980. return schemedPath;
  981. }
  982. ///
  983. /// @brief Create copy of @ref NYT::TRichYPath with schema derived from TRowType if possible.
  984. ///
  985. /// If TRowType is protobuf message schema is derived from it and set to returned path.
  986. /// Otherwise schema of original path is left unchanged (and probably unset).
  987. template <typename TRowType>
  988. TRichYPath MaybeWithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
  989. {
  990. if constexpr (std::is_base_of_v<::google::protobuf::Message, TRowType>) {
  991. return WithSchema<TRowType>(path, sortBy);
  992. } else {
  993. return path;
  994. }
  995. }
  996. ///
  997. /// @brief Get the list of ranges related to path in compatibility mode.
  998. ///
  999. /// - If path is missing ranges, empty list is returned.
  1000. /// - If path has associated range list and the list is not empty, function returns this list.
  1001. /// - If path has associated range list and this list is empty, exception is thrown.
  1002. ///
  1003. /// Before YT-17683 RichYPath didn't support empty range list and empty range actually meant universal range.
  1004. /// This function emulates this old behavior.
  1005. ///
  1006. /// @see https://st.yandex-team.ru/YT-17683
  1007. const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path);
  1008. ////////////////////////////////////////////////////////////////////////////////
  1009. /// Statistics about table columns.
  1010. struct TTableColumnarStatistics
  1011. {
  1012. /// Total data weight for all chunks for each of requested columns.
  1013. THashMap<TString, i64> ColumnDataWeight;
  1014. /// Total weight of all old chunks that don't keep columnar statistics.
  1015. i64 LegacyChunksDataWeight = 0;
  1016. /// Timestamps total weight (only for dynamic tables).
  1017. TMaybe<i64> TimestampTotalWeight;
  1018. };
  1019. ////////////////////////////////////////////////////////////////////////////////
  1020. /// Description of a partition.
  1021. struct TMultiTablePartition
  1022. {
  1023. struct TStatistics
  1024. {
  1025. i64 ChunkCount = 0;
  1026. i64 DataWeight = 0;
  1027. i64 RowCount = 0;
  1028. };
  1029. /// Ranges of input tables for this partition.
  1030. TVector<TRichYPath> TableRanges;
  1031. /// Aggregate statistics of all the table ranges in the partition.
  1032. TStatistics AggregateStatistics;
  1033. };
  1034. /// Table partitions from GetTablePartitions command.
  1035. struct TMultiTablePartitions
  1036. {
  1037. /// Disjoint partitions into which the input tables were divided.
  1038. TVector<TMultiTablePartition> Partitions;
  1039. };
  1040. ////////////////////////////////////////////////////////////////////////////////
  1041. ///
  1042. /// @brief Contains information about tablet
  1043. ///
  1044. /// @see NYT::IClient::GetTabletInfos
  1045. struct TTabletInfo
  1046. {
  1047. ///
  1048. /// @brief Indicates the total number of rows added to the tablet (including trimmed ones).
  1049. ///
  1050. /// Currently only provided for ordered tablets.
  1051. i64 TotalRowCount = 0;
  1052. ///
  1053. /// @brief Contains the number of front rows that are trimmed and are not guaranteed to be accessible.
  1054. ///
  1055. /// Only makes sense for ordered tablet.
  1056. i64 TrimmedRowCount = 0;
  1057. ///
  1058. /// @brief Tablet cell barrier timestamp, which lags behind the current timestamp
  1059. ///
  1060. /// It is guaranteed that all transactions with commit timestamp not exceeding the barrier are fully committed;
  1061. /// e.g. all their added rows are visible (and are included in @ref NYT::TTabletInfo::TotalRowCount).
  1062. /// Mostly makes sense for ordered tablets.
  1063. ui64 BarrierTimestamp;
  1064. };
  1065. ////////////////////////////////////////////////////////////////////////////////
  1066. /// List of attributes to retrieve in operations like @ref NYT::ICypressClient::Get
  1067. struct TAttributeFilter
  1068. {
  1069. /// @cond Doxygen_Suppress
  1070. using TSelf = TAttributeFilter;
  1071. /// @endcond
  1072. /// List of attributes.
  1073. FLUENT_VECTOR_FIELD(TString, Attribute);
  1074. };
  1075. ////////////////////////////////////////////////////////////////////////////////
  1076. ///
  1077. /// @brief Check if none of the fields of @ref NYT::TReadLimit is set.
  1078. ///
  1079. /// @return true if any field of readLimit is set and false otherwise.
  1080. bool IsTrivial(const TReadLimit& readLimit);
  1081. /// Convert yson node type to table schema type
  1082. EValueType NodeTypeToValueType(TNode::EType nodeType);
  1083. ////////////////////////////////////////////////////////////////////////////////
  1084. ///
  1085. /// @brief Enumeration for specifying how reading from master is performed.
  1086. ///
  1087. /// Used in operations like NYT::ICypressClient::Get
  1088. enum class EMasterReadKind : int
  1089. {
  1090. ///
  1091. /// @brief Reading from leader.
  1092. ///
  1093. /// Should almost never be used since it's expensive and for regular uses has no difference from
  1094. /// "follower" read.
  1095. Leader /* "leader" */,
  1096. /// @brief Reading from master follower (default).
  1097. Follower /* "follower" */,
  1098. Cache /* "cache" */,
  1099. MasterCache /* "master_cache" */,
  1100. };
  1101. ////////////////////////////////////////////////////////////////////////////////
  1102. /// @cond Doxygen_Suppress
  1103. namespace NDetail {
  1104. // MUST NOT BE USED BY CLIENTS
  1105. // TODO: we should use default GENERATE_ENUM_SERIALIZATION
  1106. TString ToString(EValueType type);
  1107. } // namespace NDetail
  1108. /// @endcond
  1109. ////////////////////////////////////////////////////////////////////////////////
  1110. } // namespace NYT