common.h 37 KB


  1. #pragma once
  2. ///
  3. /// @file yt/cpp/mapreduce/interface/common.h
  4. ///
  5. /// Header containing miscellaneous structs and classes used in library.
  6. #include "fwd.h"
  7. #include <library/cpp/type_info/type_info.h>
  8. #include <library/cpp/yson/node/node.h>
  9. #include <util/generic/guid.h>
  10. #include <util/generic/map.h>
  11. #include <util/generic/maybe.h>
  12. #include <util/generic/ptr.h>
  13. #include <util/system/type_name.h>
  14. #include <util/generic/vector.h>
  15. #include <google/protobuf/message.h>
  16. #include <initializer_list>
  17. #include <type_traits>
  18. namespace NYT {
  19. ////////////////////////////////////////////////////////////////////////////////
  20. /// @cond Doxygen_Suppress
  21. #define FLUENT_FIELD(type, name) \
  22. type name##_; \
  23. TSelf& name(const type& value) \
  24. { \
  25. name##_ = value; \
  26. return static_cast<TSelf&>(*this); \
  27. } \
  28. static_assert(true)
  29. #define FLUENT_FIELD_ENCAPSULATED(type, name) \
  30. private: \
  31. type name##_; \
  32. public: \
  33. TSelf& name(const type& value) & \
  34. { \
  35. name##_ = value; \
  36. return static_cast<TSelf&>(*this); \
  37. } \
  38. TSelf name(const type& value) && \
  39. { \
  40. name##_ = value; \
  41. return static_cast<TSelf&>(*this); \
  42. } \
  43. const type& name() const & \
  44. { \
  45. return name##_; \
  46. } \
  47. type name() && \
  48. { \
  49. return name##_; \
  50. } \
  51. static_assert(true)
  52. #define FLUENT_FIELD_OPTION(type, name) \
  53. TMaybe<type> name##_; \
  54. TSelf& name(const type& value) \
  55. { \
  56. name##_ = value; \
  57. return static_cast<TSelf&>(*this); \
  58. } \
  59. static_assert(true)
  60. #define FLUENT_FIELD_OPTION_ENCAPSULATED(type, name) \
  61. private: \
  62. TMaybe<type> name##_; \
  63. public: \
  64. TSelf& name(const type& value) & \
  65. { \
  66. name##_ = value; \
  67. return static_cast<TSelf&>(*this); \
  68. } \
  69. TSelf name(const type& value) && \
  70. { \
  71. name##_ = value; \
  72. return static_cast<TSelf&>(*this); \
  73. } \
  74. TSelf& Reset##name() & \
  75. { \
  76. name##_ = Nothing(); \
  77. return static_cast<TSelf&>(*this); \
  78. } \
  79. TSelf Reset##name() && \
  80. { \
  81. name##_ = Nothing(); \
  82. return static_cast<TSelf&>(*this); \
  83. } \
  84. const TMaybe<type>& name() const& \
  85. { \
  86. return name##_; \
  87. } \
  88. TMaybe<type> name() && \
  89. { \
  90. return name##_; \
  91. } \
  92. static_assert(true)
  93. #define FLUENT_FIELD_DEFAULT(type, name, defaultValue) \
  94. type name##_ = defaultValue; \
  95. TSelf& name(const type& value) \
  96. { \
  97. name##_ = value; \
  98. return static_cast<TSelf&>(*this); \
  99. } \
  100. static_assert(true)
  101. #define FLUENT_FIELD_DEFAULT_ENCAPSULATED(type, name, defaultValue) \
  102. private: \
  103. type name##_ = defaultValue; \
  104. public: \
  105. TSelf& name(const type& value) & \
  106. { \
  107. name##_ = value; \
  108. return static_cast<TSelf&>(*this); \
  109. } \
  110. TSelf name(const type& value) && \
  111. { \
  112. name##_ = value; \
  113. return static_cast<TSelf&>(*this); \
  114. } \
  115. const type& name() const & \
  116. { \
  117. return name##_; \
  118. } \
  119. type name() && \
  120. { \
  121. return name##_; \
  122. } \
  123. static_assert(true)
  124. #define FLUENT_VECTOR_FIELD(type, name) \
  125. TVector<type> name##s_; \
  126. TSelf& Add##name(const type& value) \
  127. { \
  128. name##s_.push_back(value); \
  129. return static_cast<TSelf&>(*this);\
  130. } \
  131. TSelf& name##s(TVector<type> values) \
  132. { \
  133. name##s_ = std::move(values); \
  134. return static_cast<TSelf&>(*this);\
  135. } \
  136. static_assert(true)
  137. #define FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(type, name) \
  138. private: \
  139. TMaybe<TVector<type>> name##s_; \
  140. public: \
  141. const TMaybe<TVector<type>>& name##s() const & { \
  142. return name##s_; \
  143. } \
  144. TMaybe<TVector<type>>& name##s() & { \
  145. return name##s_; \
  146. } \
  147. TMaybe<TVector<type>> name##s() && { \
  148. return std::move(name##s_); \
  149. } \
  150. TSelf& Add##name(const type& value) & \
  151. { \
  152. if (name##s_.Empty()) { \
  153. name##s_.ConstructInPlace(); \
  154. } \
  155. name##s_->push_back(value); \
  156. return static_cast<TSelf&>(*this);\
  157. } \
  158. TSelf Add##name(const type& value) && \
  159. { \
  160. if (name##s_.Empty()) { \
  161. name##s_.ConstructInPlace(); \
  162. } \
  163. name##s_->push_back(value); \
  164. return static_cast<TSelf&&>(*this);\
  165. } \
  166. TSelf& name##s(TVector<type> values) & \
  167. { \
  168. name##s_ = std::move(values); \
  169. return static_cast<TSelf&>(*this);\
  170. } \
  171. TSelf name##s(TVector<type> values) && \
  172. { \
  173. name##s_ = std::move(values); \
  174. return static_cast<TSelf&&>(*this);\
  175. } \
  176. TSelf& name##s(TNothing) & \
  177. { \
  178. name##s_ = Nothing(); \
  179. return static_cast<TSelf&>(*this);\
  180. } \
  181. TSelf name##s(TNothing) && \
  182. { \
  183. name##s_ = Nothing(); \
  184. return static_cast<TSelf&&>(*this);\
  185. } \
  186. TSelf& Reset##name##s() & \
  187. { \
  188. name##s_ = Nothing(); \
  189. return static_cast<TSelf&>(*this);\
  190. } \
  191. TSelf Reset##name##s() && \
  192. { \
  193. name##s_ = Nothing(); \
  194. return static_cast<TSelf&&>(*this);\
  195. } \
  196. static_assert(true)
  197. #define FLUENT_VECTOR_FIELD_ENCAPSULATED(type, name) \
  198. private: \
  199. TVector<type> name##s_; \
  200. public: \
  201. TSelf& Add##name(const type& value) & \
  202. { \
  203. name##s_.push_back(value); \
  204. return static_cast<TSelf&>(*this);\
  205. } \
  206. TSelf Add##name(const type& value) && \
  207. { \
  208. name##s_.push_back(value); \
  209. return static_cast<TSelf&>(*this);\
  210. } \
  211. TSelf& name##s(TVector<type> value) & \
  212. { \
  213. name##s_ = std::move(value); \
  214. return static_cast<TSelf&>(*this);\
  215. } \
  216. TSelf name##s(TVector<type> value) && \
  217. { \
  218. name##s_ = std::move(value); \
  219. return static_cast<TSelf&>(*this);\
  220. } \
  221. const TVector<type>& name##s() const & \
  222. { \
  223. return name##s_; \
  224. } \
  225. TVector<type> name##s() && \
  226. { \
  227. return name##s_; \
  228. } \
  229. static_assert(true)
  230. #define FLUENT_MAP_FIELD(keytype, valuetype, name) \
  231. TMap<keytype,valuetype> name##_; \
  232. TSelf& Add##name(const keytype& key, const valuetype& value) \
  233. { \
  234. name##_.emplace(key, value); \
  235. return static_cast<TSelf&>(*this);\
  236. } \
  237. static_assert(true)
  238. /// @endcond
  239. ////////////////////////////////////////////////////////////////////////////////
  240. ///
  241. /// @brief Convenience class that keeps sequence of items.
  242. ///
  243. /// Designed to be used as function parameter.
  244. ///
  245. /// Users of such function can then pass:
  246. /// - single item,
  247. /// - initializer list of items,
  248. /// - vector of items;
  249. /// as argument to this function.
  250. ///
  251. /// Example:
  252. /// ```
  253. /// void Foo(const TOneOrMany<int>& arg);
  254. /// ...
  255. /// Foo(1); // ok
  256. /// Foo({1, 2, 3}); // ok
  257. /// ```
  258. template <class T, class TDerived>
  259. struct TOneOrMany
  260. {
  261. /// @cond Doxygen_Suppress
  262. using TSelf = std::conditional_t<std::is_void_v<TDerived>, TOneOrMany, TDerived>;
  263. /// @endcond
  264. /// Initialize with empty sequence.
  265. TOneOrMany() = default;
  266. // Initialize from initializer list.
  267. template<class U>
  268. TOneOrMany(std::initializer_list<U> il)
  269. {
  270. Parts_.assign(il.begin(), il.end());
  271. }
  272. /// Put arguments to sequence
  273. template <class U, class... TArgs>
  274. requires std::is_convertible_v<U, T>
  275. TOneOrMany(U&& arg, TArgs&&... args)
  276. {
  277. Add(arg, std::forward<TArgs>(args)...);
  278. }
  279. /// Initialize from vector.
  280. TOneOrMany(TVector<T> args)
  281. : Parts_(std::move(args))
  282. { }
  283. /// @brief Order is defined the same way as in TVector
  284. bool operator==(const TOneOrMany& rhs) const
  285. {
  286. // N.B. We would like to make this method to be `= default`,
  287. // but this breaks MSVC compiler for the cases when T doesn't
  288. // support comparison.
  289. return Parts_ == rhs.Parts_;
  290. }
  291. ///
  292. /// @{
  293. ///
  294. /// @brief Add all arguments to sequence
  295. template <class U, class... TArgs>
  296. requires std::is_convertible_v<U, T>
  297. TSelf& Add(U&& part, TArgs&&... args) &
  298. {
  299. Parts_.push_back(std::forward<U>(part));
  300. if constexpr (sizeof...(args) > 0) {
  301. [[maybe_unused]] int dummy[sizeof...(args)] = {(Parts_.push_back(std::forward<TArgs>(args)), 0) ... };
  302. }
  303. return static_cast<TSelf&>(*this);
  304. }
  305. template <class U, class... TArgs>
  306. requires std::is_convertible_v<U, T>
  307. TSelf Add(U&& part, TArgs&&... args) &&
  308. {
  309. return std::move(Add(std::forward<U>(part), std::forward<TArgs>(args)...));
  310. }
  311. /// @}
  312. /// Content of sequence.
  313. TVector<T> Parts_;
  314. };
  315. ////////////////////////////////////////////////////////////////////////////////
  316. ///
  317. /// @brief Type of the value that can occur in YT table.
  318. ///
  319. /// @ref NYT::TTableSchema
  320. /// https://ytsaurus.tech/docs/en/user-guide/storage/data-types
  321. enum EValueType : int
  322. {
  323. /// Int64, signed integer of 64 bits.
  324. VT_INT64,
  325. /// Uint64, unsigned integer of 64 bits.
  326. VT_UINT64,
  327. /// Double, floating point number of double precision (64 bits).
  328. VT_DOUBLE,
  329. /// Boolean, `true` or `false`.
  330. VT_BOOLEAN,
  331. /// String, arbitrary byte sequence.
  332. VT_STRING,
  333. /// Any, arbitrary yson document.
  334. VT_ANY,
  335. /// Int8, signed integer of 8 bits.
  336. VT_INT8,
  337. /// Int16, signed integer of 16 bits.
  338. VT_INT16,
  339. /// Int32, signed integer of 32 bits.
  340. VT_INT32,
  341. /// Uint8, unsigned integer of 8 bits.
  342. VT_UINT8,
  343. /// Uint16, unsigned integer of 16 bits.
  344. VT_UINT16,
  345. /// Uint32, unsigned integer of 32 bits.
  346. VT_UINT32,
  347. /// Utf8, byte sequence that is valid utf8.
  348. VT_UTF8,
  349. /// Null, absence of value (almost never used in schemas)
  350. VT_NULL,
  351. /// Void, absence of value (almost never used in schemas) the difference between null, and void is yql-specific.
  352. VT_VOID,
  353. /// Date, number of days since Unix epoch (unsigned)
  354. VT_DATE,
  355. /// Datetime, number of seconds since Unix epoch (unsigned)
  356. VT_DATETIME,
  357. /// Timestamp, number of milliseconds since Unix epoch (unsigned)
  358. VT_TIMESTAMP,
  359. /// Interval, difference between two timestamps (signed)
  360. VT_INTERVAL,
  361. /// Float, floating point number (32 bits)
  362. VT_FLOAT,
  363. /// Json, sequence of bytes that is valid json.
  364. VT_JSON,
  365. // Date32, number of days shifted from Unix epoch, which is 0 (signed)
  366. VT_DATE32,
  367. // Datetime64, number of seconds shifted from Unix epoch, which is 0 (signed)
  368. VT_DATETIME64,
  369. // Timestamp64, number of milliseconds shifted from Unix epoch, which is 0 (signed)
  370. VT_TIMESTAMP64,
  371. // Interval64, difference between two timestamps64 (signed)
  372. VT_INTERVAL64,
  373. // Universally unique identifier according to RFC-4122.
  374. VT_UUID,
  375. };
  376. ///
  377. /// @brief Sort order.
  378. ///
  379. /// @ref NYT::TTableSchema
  380. enum ESortOrder : int
  381. {
  382. /// Ascending sort order.
  383. SO_ASCENDING /* "ascending" */,
  384. /// Descending sort order.
  385. SO_DESCENDING /* "descending" */,
  386. };
  387. ///
  388. /// @brief Value of "optimize_for" attribute.
  389. ///
  390. /// @ref NYT::TRichYPath
  391. enum EOptimizeForAttr : i8
  392. {
  393. /// Optimize for scan
  394. OF_SCAN_ATTR /* "scan" */,
  395. /// Optimize for lookup
  396. OF_LOOKUP_ATTR /* "lookup" */,
  397. };
  398. ///
  399. /// @brief Value of "erasure_codec" attribute.
  400. ///
  401. /// @ref NYT::TRichYPath
  402. enum EErasureCodecAttr : i8
  403. {
  404. /// @cond Doxygen_Suppress
  405. EC_NONE_ATTR /* "none" */,
  406. EC_REED_SOLOMON_6_3_ATTR /* "reed_solomon_6_3" */,
  407. EC_LRC_12_2_2_ATTR /* "lrc_12_2_2" */,
  408. EC_ISA_LRC_12_2_2_ATTR /* "isa_lrc_12_2_2" */,
  409. /// @endcond
  410. };
  411. ///
  412. /// @brief Value of "schema_modification" attribute.
  413. ///
  414. /// @ref NYT::TRichYPath
  415. enum ESchemaModificationAttr : i8
  416. {
  417. SM_NONE_ATTR /* "none" */,
  418. SM_UNVERSIONED_UPDATE /* "unversioned_update" */,
  419. };
  420. ////////////////////////////////////////////////////////////////////////////////
  421. ///
  422. /// @brief Table key column description.
  423. ///
  424. /// The description includes column name and sort order.
  425. ///
  426. /// @anchor TSortOrder_backward_compatibility
  427. /// @note
  428. /// Many functions that use `TSortOrder` as argument used to take `TString`
  429. /// (the only allowed sort order was "ascending" and user didn't have to specify it).
  430. /// @note
  431. /// This class is designed to provide backward compatibility for such code and therefore
  432. /// objects of this class can be constructed and assigned from TString-like objects only.
  433. ///
  434. /// @see NYT::TSortOperationSpec
  435. class TSortColumn
  436. {
  437. public:
  438. /// @cond Doxygen_Suppress
  439. using TSelf = TSortColumn;
  440. /// @endcond
  441. /// Column name
  442. FLUENT_FIELD_ENCAPSULATED(TString, Name);
  443. /// Sort order
  444. FLUENT_FIELD_DEFAULT_ENCAPSULATED(ESortOrder, SortOrder, ESortOrder::SO_ASCENDING);
  445. ///
  446. /// @{
  447. ///
  448. /// @brief Construct object from name and sort order
  449. ///
  450. /// Constructors are intentionally implicit so `TSortColumn` can be compatible with old code.
  451. /// @ref TSortOrder_backward_compatibility
  452. TSortColumn(TStringBuf name = {}, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  453. TSortColumn(const TString& name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  454. TSortColumn(const char* name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
  455. /// @}
  456. /// Check that sort order is ascending, throw exception otherwise.
  457. const TSortColumn& EnsureAscending() const;
  458. /// @brief Convert sort to yson representation as YT API expects it.
  459. TNode ToNode() const;
  460. /// @brief Comparison is default and checks both name and sort order.
  461. bool operator == (const TSortColumn& rhs) const = default;
  462. ///
  463. /// @{
  464. ///
  465. /// @brief Assign object from column name, and set sort order to `ascending`.
  466. ///
  467. /// This is backward compatibility methods.
  468. ///
  469. /// @ref TSortOrder_backward_compatibility
  470. TSortColumn& operator = (TStringBuf name);
  471. TSortColumn& operator = (const TString& name);
  472. TSortColumn& operator = (const char* name);
  473. /// @}
  474. bool operator == (const TStringBuf rhsName) const;
  475. bool operator == (const TString& rhsName) const;
  476. bool operator == (const char* rhsName) const;
  477. // Intentionally implicit conversions.
  478. operator TString() const;
  479. operator TStringBuf() const;
  480. operator std::string() const;
  481. Y_SAVELOAD_DEFINE(Name_, SortOrder_);
  482. };
  483. ///
  484. /// @brief List of @ref TSortColumn
  485. ///
  486. /// Contains a bunch of helper methods such as constructing from single object.
  487. class TSortColumns
  488. : public TOneOrMany<TSortColumn, TSortColumns>
  489. {
  490. public:
  491. using TOneOrMany<TSortColumn, TSortColumns>::TOneOrMany;
  492. /// Construct empty list.
  493. TSortColumns();
  494. ///
  495. /// @{
  496. ///
  497. /// @brief Construct list of ascending sort order columns by their names.
  498. ///
  499. /// Required for backward compatibility.
  500. ///
  501. /// @ref TSortOrder_backward_compatibility
  502. TSortColumns(const TVector<TString>& names);
  503. TSortColumns(const TColumnNames& names);
  504. /// @}
  505. ///
  506. /// @brief Implicit conversion to column list.
  507. ///
  508. /// If all columns has ascending sort order return list of their names.
  509. /// Throw exception otherwise.
  510. ///
  511. /// Required for backward compatibility.
  512. ///
  513. /// @ref TSortOrder_backward_compatibility
  514. operator TColumnNames() const;
  515. /// Make sure that all columns are of ascending sort order.
  516. const TSortColumns& EnsureAscending() const;
  517. /// Get list of column names.
  518. TVector<TString> GetNames() const;
  519. };
  520. ////////////////////////////////////////////////////////////////////////////////
  521. /// Helper function to create new style type from old style one.
  522. NTi::TTypePtr ToTypeV3(EValueType type, bool required);
  523. ///
  524. /// @brief Single column description
  525. ///
  526. /// Each field describing column has setter and getter.
  527. ///
  528. /// Example reading field:
  529. /// ```
  530. /// ... columnSchema.Name() ...
  531. /// ```
  532. ///
  533. /// Example setting field:
  534. /// ```
  535. /// columnSchema.Name("my-column").Type(VT_INT64); // set name and type
  536. /// ```
  537. ///
  538. /// @ref https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  539. class TColumnSchema
  540. {
  541. public:
  542. /// @cond Doxygen_Suppress
  543. using TSelf = TColumnSchema;
  544. /// @endcond
  545. ///
  546. /// @brief Construct empty column schemas
  547. ///
  548. /// @note
  549. /// Such schema cannot be used in schema as it it doesn't have name.
  550. TColumnSchema();
  551. ///
  552. /// @{
  553. ///
  554. /// @brief Copy and move constructors are default.
  555. TColumnSchema(const TColumnSchema&) = default;
  556. TColumnSchema& operator=(const TColumnSchema&) = default;
  557. /// @}
  558. FLUENT_FIELD_ENCAPSULATED(TString, Name);
  559. ///
  560. /// @brief Functions to work with type in old manner.
  561. ///
  562. /// @deprecated New code is recommended to work with types using @ref NTi::TTypePtr from type_info library.
  563. TColumnSchema& Type(EValueType type) &;
  564. TColumnSchema Type(EValueType type) &&;
  565. EValueType Type() const;
  566. /// @brief Set and get column type.
  567. /// @{
  568. TColumnSchema& Type(const NTi::TTypePtr& type) &;
  569. TColumnSchema Type(const NTi::TTypePtr& type) &&;
  570. TColumnSchema& TypeV3(const NTi::TTypePtr& type) &;
  571. TColumnSchema TypeV3(const NTi::TTypePtr& type) &&;
  572. NTi::TTypePtr TypeV3() const;
  573. /// @}
  574. ///
  575. /// @brief Raw yson representation of column type
  576. /// @deprecated Prefer to use `TypeV3` methods.
  577. FLUENT_FIELD_OPTION_ENCAPSULATED(TNode, RawTypeV3);
  578. /// Column sort order
  579. FLUENT_FIELD_OPTION_ENCAPSULATED(ESortOrder, SortOrder);
  580. ///
  581. /// @brief Lock group name
  582. ///
  583. /// @ref https://ytsaurus.tech/docs/en/user-guide/dynamic-tables/sorted-dynamic-tables#locking-rows
  584. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Lock);
  585. /// Expression defining column value
  586. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Expression);
  587. /// Aggregating function name
  588. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Aggregate);
  589. ///
  590. /// @brief Storage group name
  591. ///
  592. /// @ref https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  593. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Group);
  594. // StableName for renamed and deleted columns.
  595. FLUENT_FIELD_OPTION_ENCAPSULATED(TString, StableName);
  596. /// Deleted column
  597. FLUENT_FIELD_OPTION_ENCAPSULATED(bool, Deleted);
  598. ///
  599. /// @brief Column requiredness.
  600. ///
  601. /// Required columns doesn't accept NULL values.
  602. /// Usually if column is required it means that it has Optional<...> type
  603. bool Required() const;
  604. ///
  605. /// @{
  606. ///
  607. /// @brief Set type in old-style manner
  608. TColumnSchema& Type(EValueType type, bool required) &;
  609. TColumnSchema Type(EValueType type, bool required) &&;
  610. /// @}
  611. private:
  612. friend void Deserialize(TColumnSchema& columnSchema, const TNode& node);
  613. NTi::TTypePtr TypeV3_;
  614. bool Required_ = false;
  615. };
  616. /// Equality check checks all fields of column schema.
  617. bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs);
  618. ///
  619. /// @brief Description of table schema
  620. ///
  621. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/static-schema
  622. class TTableSchema
  623. {
  624. public:
  625. /// @cond Doxygen_Suppress
  626. using TSelf = TTableSchema;
  627. /// @endcond
  628. /// Column schema
  629. FLUENT_VECTOR_FIELD_ENCAPSULATED(TColumnSchema, Column);
  630. ///
  631. /// @brief Strictness of the schema
  632. ///
  633. /// Strict schemas are not allowed to have columns not described in schema.
  634. /// Nonstrict schemas are allowed to have such columns, all such missing columns are assumed to have
  635. /// type any (or optional<yson> in type_v3 terminology).
  636. FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, Strict, true);
  637. ///
  638. /// @brief Whether keys are unique
  639. ///
  640. /// This flag can be set only for schemas that have sorted columns.
  641. /// If flag is set table cannot have multiple rows with same key.
  642. FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, UniqueKeys, false);
  643. /// Get modifiable column list
  644. TVector<TColumnSchema>& MutableColumns();
  645. /// Check if schema has any described column
  646. [[nodiscard]] bool Empty() const;
  647. /// Add column
  648. TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &;
  649. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  650. TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&;
  651. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  652. TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type) &;
  653. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
  654. TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type) &&;
  655. /// Add optional column of specified type
  656. TTableSchema& AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &;
  657. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  658. TTableSchema AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&;
  659. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  660. TTableSchema& AddColumn(const TString& name, EValueType type) &;
  661. /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
  662. TTableSchema AddColumn(const TString& name, EValueType type) &&;
  663. ///
  664. /// @brief Make table schema sorted by specified columns
  665. ///
  666. /// Resets old key columns if any
  667. TTableSchema& SortBy(const TSortColumns& columns) &;
  668. /// @copydoc NYT::TTableSchema::SortBy(const TSortColumns&)&;
  669. TTableSchema SortBy(const TSortColumns& columns) &&;
  670. /// Get yson description of table schema
  671. [[nodiscard]] TNode ToNode() const;
  672. /// Parse schema from yson node
  673. static NYT::TTableSchema FromNode(const TNode& node);
  674. friend void Deserialize(TTableSchema& tableSchema, const TNode& node);
  675. };
  676. /// Check for equality of all columns and all schema attributes
  677. bool operator==(const TTableSchema& lhs, const TTableSchema& rhs);
  678. // Pretty printer for unittests
  679. void PrintTo(const TTableSchema& schema, std::ostream* out);
  680. /// Create table schema by protobuf message descriptor
  681. TTableSchema CreateTableSchema(
  682. const ::google::protobuf::Descriptor& messageDescriptor,
  683. const TSortColumns& sortColumns = TSortColumns(),
  684. bool keepFieldsWithoutExtension = true);
  685. /// Create table schema by protobuf message type
  686. template <class TProtoType, typename = std::enable_if_t<std::is_base_of_v<::google::protobuf::Message, TProtoType>>>
  687. inline TTableSchema CreateTableSchema(
  688. const TSortColumns& sortColumns = TSortColumns(),
  689. bool keepFieldsWithoutExtension = true)
  690. {
  691. static_assert(
  692. std::is_base_of_v<::google::protobuf::Message, TProtoType>,
  693. "Template argument must be derived from ::google::protobuf::Message");
  694. return CreateTableSchema(
  695. *TProtoType::descriptor(),
  696. sortColumns,
  697. keepFieldsWithoutExtension);
  698. }
  699. ///
  700. /// @brief Create strict table schema from `struct` type.
  701. ///
  702. /// Names and types of columns are taken from struct member names and types.
  703. /// `Strict` flag is set to true, all other attribute of schema and columns
  704. /// are left with default values
  705. TTableSchema CreateTableSchema(NTi::TTypePtr type);
  706. ////////////////////////////////////////////////////////////////////////////////
  707. ///
  708. /// @brief Enumeration describing comparison operation used in key bound.
  709. ///
  710. /// ERelation is a part of @ref NYT::TKeyBound that can be used as
  711. /// lower or upper key limit in @ref TReadLimit.
  712. ///
  713. /// Relations `Less` and `LessOrEqual` are for upper limit and
  714. /// relations `Greater` and `GreaterOrEqual` are for lower limit.
  715. ///
  716. /// It is a error to use relation in the limit of wrong kind.
  717. ///
  718. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/ypath#rich_ypath
  719. enum class ERelation
  720. {
  721. ///
  722. /// @brief Relation "less"
  723. ///
  724. /// Specifies range of keys that are before specified key.
  725. /// Can only be used in upper limit.
  726. Less /* "<" */,
  727. ///
  728. /// @brief Relation "less or equal"
  729. ///
  730. /// Specifies range of keys that are before or equal specified key.
  731. /// Can only be used in upper limit.
  732. LessOrEqual /* "<=" */,
  733. ///
  734. /// @brief Relation "greater"
  735. ///
  736. /// Specifies range of keys that are after specified key.
  737. /// Can only be used in lower limit.
  738. Greater /* ">" */,
  739. ///
  740. /// @brief Relation "greater or equal"
  741. ///
  742. /// Specifies range of keys that are after or equal than specified key.
  743. /// Can only be used in lower limit.
  744. GreaterOrEqual /* ">=" */,
  745. };
  746. ///
  747. /// @brief Key with relation specifying interval of keys in lower or upper limit of @ref NYT::TReadRange
  748. ///
  749. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  750. struct TKeyBound
  751. {
  752. /// @cond Doxygen_Suppress
  753. using TSelf = TKeyBound;
  754. explicit TKeyBound(ERelation relation = ERelation::Less, TKey key = TKey{});
  755. FLUENT_FIELD_DEFAULT_ENCAPSULATED(ERelation, Relation, ERelation::Less);
  756. FLUENT_FIELD_DEFAULT_ENCAPSULATED(TKey, Key, TKey{});
  757. /// @endcond
  758. };
  759. ///
  760. /// @brief Description of the read limit.
  761. ///
  762. /// It is actually a variant and must store exactly one field.
  763. ///
  764. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  765. struct TReadLimit
  766. {
  767. /// @cond Doxygen_Suppress
  768. using TSelf = TReadLimit;
  769. /// @endcond
  770. ///
  771. /// @brief KeyBound specifies table key and whether to include it
  772. ///
  773. /// It can be used in lower or upper limit when reading tables.
  774. FLUENT_FIELD_OPTION(TKeyBound, KeyBound);
  775. ///
  776. /// @brief Table key
  777. ///
  778. /// It can be used in exact, lower or upper limit when reading tables.
  779. FLUENT_FIELD_OPTION(TKey, Key);
  780. ///
  781. /// @brief Row index
  782. ///
  783. /// It can be used in exact, lower or upper limit when reading tables.
  784. FLUENT_FIELD_OPTION(i64, RowIndex);
  785. ///
  786. /// @brief File offset
  787. ///
  788. /// It can be used in lower or upper limit when reading files.
  789. FLUENT_FIELD_OPTION(i64, Offset);
  790. ///
  791. /// @brief Tablet index
  792. ///
  793. /// It can be used in lower or upper limit in dynamic table operations
  794. FLUENT_FIELD_OPTION(i64, TabletIndex);
  795. };
  796. ///
  797. /// @brief Range of a table or a file
  798. ///
  799. /// @see https://ytsaurus.tech/docs/en/user-guide/common/ypath#rich_ypath
  800. struct TReadRange
  801. {
  802. using TSelf = TReadRange;
  803. ///
  804. /// @brief Lower limit of the range
  805. ///
  806. /// It is usually inclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::Greater is used).
  807. FLUENT_FIELD(TReadLimit, LowerLimit);
  808. ///
  809. /// @brief Lower limit of the range
  810. ///
  811. /// It is usually exclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::LessOrEqual is used).
  812. FLUENT_FIELD(TReadLimit, UpperLimit);
  813. /// Exact key or row index.
  814. FLUENT_FIELD(TReadLimit, Exact);
  815. /// Create read range from row indexes.
  816. static TReadRange FromRowIndices(i64 lowerLimit, i64 upperLimit)
  817. {
  818. return TReadRange()
  819. .LowerLimit(TReadLimit().RowIndex(lowerLimit))
  820. .UpperLimit(TReadLimit().RowIndex(upperLimit));
  821. }
  822. /// Create read range from keys.
  823. static TReadRange FromKeys(const TKey& lowerKeyInclusive, const TKey& upperKeyExclusive)
  824. {
  825. return TReadRange()
  826. .LowerLimit(TReadLimit().Key(lowerKeyInclusive))
  827. .UpperLimit(TReadLimit().Key(upperKeyExclusive));
  828. }
  829. };
  830. ///
  831. /// @brief Path with additional attributes.
  832. ///
  833. /// Allows to specify additional attributes for path used in some operations.
  834. ///
  835. /// @see https://ytsaurus.tech/docs/en/user-guide/storage/ypath#rich_ypath
  836. struct TRichYPath
  837. {
  838. /// @cond Doxygen_Suppress
  839. using TSelf = TRichYPath;
  840. /// @endcond
  841. /// Path itself.
  842. FLUENT_FIELD(TYPath, Path);
  843. /// Specifies that path should be appended not overwritten
  844. FLUENT_FIELD_OPTION(bool, Append);
  845. /// @deprecated Deprecated attribute.
  846. FLUENT_FIELD_OPTION(bool, PartiallySorted);
  847. /// Specifies that path is expected to be sorted by these columns.
  848. FLUENT_FIELD(TSortColumns, SortedBy);
  849. /// Add range to read.
  850. TRichYPath& AddRange(TReadRange range)
  851. {
  852. if (!Ranges_) {
  853. Ranges_.ConstructInPlace();
  854. }
  855. Ranges_->push_back(std::move(range));
  856. return *this;
  857. }
  858. TRichYPath& ResetRanges()
  859. {
  860. Ranges_.Clear();
  861. return *this;
  862. }
  863. ///
  864. /// @{
  865. ///
  866. /// Return ranges to read.
  867. ///
  868. /// NOTE: Nothing (in TMaybe) and empty TVector are different ranges.
  869. /// Nothing represents universal range (reader reads all table rows).
  870. /// Empty TVector represents empty range (reader returns empty set of rows).
  871. const TMaybe<TVector<TReadRange>>& GetRanges() const
  872. {
  873. return Ranges_;
  874. }
  875. TMaybe<TVector<TReadRange>>& MutableRanges()
  876. {
  877. return Ranges_;
  878. }
  879. ///
  880. /// @{
  881. ///
  882. /// Get range view, that is convenient way to iterate through all ranges.
  883. TArrayRef<TReadRange> MutableRangesView()
  884. {
  885. if (Ranges_.Defined()) {
  886. return TArrayRef(Ranges_->data(), Ranges_->size());
  887. } else {
  888. return {};
  889. }
  890. }
  891. TArrayRef<const TReadRange> GetRangesView() const
  892. {
  893. if (Ranges_.Defined()) {
  894. return TArrayRef(Ranges_->data(), Ranges_->size());
  895. } else {
  896. return {};
  897. }
  898. }
  899. /// @}
  900. /// @{
  901. ///
  902. /// Get range by index.
  903. const TReadRange& GetRange(ssize_t i) const
  904. {
  905. return Ranges_.GetRef()[i];
  906. }
  907. TReadRange& MutableRange(ssize_t i)
  908. {
  909. return Ranges_.GetRef()[i];
  910. }
  911. /// @}
  912. ///
  913. /// @brief Specifies columns that should be read.
  914. ///
  915. /// If it's set to Nothing then all columns will be read.
  916. /// If empty TColumnNames is specified then each read row will be empty.
  917. FLUENT_FIELD_OPTION(TColumnNames, Columns);
  918. FLUENT_FIELD_OPTION(bool, Teleport);
  919. FLUENT_FIELD_OPTION(bool, Primary);
  920. FLUENT_FIELD_OPTION(bool, Foreign);
  921. FLUENT_FIELD_OPTION(i64, RowCountLimit);
  922. FLUENT_FIELD_OPTION(TString, FileName);
  923. /// Specifies original path to be shown in Web UI
  924. FLUENT_FIELD_OPTION(TYPath, OriginalPath);
  925. ///
  926. /// @brief Specifies that this path points to executable file
  927. ///
  928. /// Used in operation specs.
  929. FLUENT_FIELD_OPTION(bool, Executable);
  930. ///
  931. /// @brief Specify format to use when loading table.
  932. ///
  933. /// Used in operation specs.
  934. FLUENT_FIELD_OPTION(TNode, Format);
  935. /// @brief Specifies table schema that will be set on the path
  936. FLUENT_FIELD_OPTION(TTableSchema, Schema);
  937. /// Specifies compression codec that will be set on the path
  938. FLUENT_FIELD_OPTION(TString, CompressionCodec);
  939. /// Specifies erasure codec that will be set on the path
  940. FLUENT_FIELD_OPTION(EErasureCodecAttr, ErasureCodec);
  941. /// Specifies schema modification that will be set on the path
  942. FLUENT_FIELD_OPTION(ESchemaModificationAttr, SchemaModification);
  943. /// Specifies optimize_for attribute that will be set on the path
  944. FLUENT_FIELD_OPTION(EOptimizeForAttr, OptimizeFor);
  945. ///
  946. /// @brief Do not put file used in operation into node cache
  947. ///
  948. /// If BypassArtifactCache == true, file will be loaded into the job's sandbox bypassing the cache on the YT node.
  949. /// It helps jobs that use tmpfs to start faster,
  950. /// because files will be loaded into tmpfs directly bypassing disk cache
  951. FLUENT_FIELD_OPTION(bool, BypassArtifactCache);
  952. ///
  953. /// @brief Timestamp of dynamic table.
  954. ///
  955. /// NOTE: it is _not_ unix timestamp
  956. /// (instead it's transaction timestamp, that is more complex structure).
  957. FLUENT_FIELD_OPTION(i64, Timestamp);
  958. ///
  959. /// @brief Specify transaction that should be used to access this path.
  960. ///
  961. /// Allows to start cross-transactional operations.
  962. FLUENT_FIELD_OPTION(TTransactionId, TransactionId);
  963. using TRenameColumnsDescriptor = THashMap<TString, TString>;
  964. /// Specifies columnar mapping which will be applied to columns before transfer to job.
  965. FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns);
  966. /// Create empty path with no attributes
  967. TRichYPath()
  968. { }
  969. ///
  970. /// @{
  971. ///
  972. /// @brief Create path from string
  973. TRichYPath(const char* path)
  974. : Path_(path)
  975. { }
  976. TRichYPath(const TYPath& path)
  977. : Path_(path)
  978. { }
  979. /// @}
  980. private:
  981. TMaybe<TVector<TReadRange>> Ranges_;
  982. };
  983. ///
  984. /// @ref Create copy of @ref NYT::TRichYPath with schema derived from proto message.
  985. ///
  986. ///
  987. template <typename TProtoType>
  988. TRichYPath WithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
  989. {
  990. static_assert(std::is_base_of_v<::google::protobuf::Message, TProtoType>, "TProtoType must be Protobuf message");
  991. auto schemedPath = path;
  992. if (!schemedPath.Schema_) {
  993. schemedPath.Schema(CreateTableSchema<TProtoType>(sortBy));
  994. }
  995. return schemedPath;
  996. }
  997. ///
  998. /// @brief Create copy of @ref NYT::TRichYPath with schema derived from TRowType if possible.
  999. ///
  1000. /// If TRowType is protobuf message schema is derived from it and set to returned path.
  1001. /// Otherwise schema of original path is left unchanged (and probably unset).
  1002. template <typename TRowType>
  1003. TRichYPath MaybeWithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
  1004. {
  1005. if constexpr (std::is_base_of_v<::google::protobuf::Message, TRowType>) {
  1006. return WithSchema<TRowType>(path, sortBy);
  1007. } else {
  1008. return path;
  1009. }
  1010. }
  1011. ///
  1012. /// @brief Get the list of ranges related to path in compatibility mode.
  1013. ///
  1014. /// - If path is missing ranges, empty list is returned.
  1015. /// - If path has associated range list and the list is not empty, function returns this list.
  1016. /// - If path has associated range list and this list is empty, exception is thrown.
  1017. ///
  1018. /// Before YT-17683 RichYPath didn't support empty range list and empty range actually meant universal range.
  1019. /// This function emulates this old behavior.
  1020. ///
  1021. /// @see https://st.yandex-team.ru/YT-17683
  1022. const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path);
  1023. ////////////////////////////////////////////////////////////////////////////////
  1024. /// Statistics about table columns.
  1025. struct TTableColumnarStatistics
  1026. {
  1027. /// Total data weight for all chunks for each of requested columns.
  1028. THashMap<TString, i64> ColumnDataWeight;
  1029. /// Estimated number of unique elements for each column.
  1030. THashMap<TString, ui64> ColumnEstimatedUniqueCounts;
  1031. /// Total weight of all old chunks that don't keep columnar statistics.
  1032. i64 LegacyChunksDataWeight = 0;
  1033. /// Timestamps total weight (only for dynamic tables).
  1034. TMaybe<i64> TimestampTotalWeight;
  1035. };
  1036. ////////////////////////////////////////////////////////////////////////////////
  1037. /// Description of a partition.
  1038. struct TMultiTablePartition
  1039. {
  1040. struct TStatistics
  1041. {
  1042. i64 ChunkCount = 0;
  1043. i64 DataWeight = 0;
  1044. i64 RowCount = 0;
  1045. };
  1046. /// Ranges of input tables for this partition.
  1047. TVector<TRichYPath> TableRanges;
  1048. /// Aggregate statistics of all the table ranges in the partition.
  1049. TStatistics AggregateStatistics;
  1050. };
  1051. /// Table partitions from GetTablePartitions command.
  1052. struct TMultiTablePartitions
  1053. {
  1054. /// Disjoint partitions into which the input tables were divided.
  1055. TVector<TMultiTablePartition> Partitions;
  1056. };
  1057. ////////////////////////////////////////////////////////////////////////////////
  1058. ///
  1059. /// @brief Contains information about tablet
  1060. ///
  1061. /// @see NYT::IClient::GetTabletInfos
  1062. struct TTabletInfo
  1063. {
  1064. ///
  1065. /// @brief Indicates the total number of rows added to the tablet (including trimmed ones).
  1066. ///
  1067. /// Currently only provided for ordered tablets.
  1068. i64 TotalRowCount = 0;
  1069. ///
  1070. /// @brief Contains the number of front rows that are trimmed and are not guaranteed to be accessible.
  1071. ///
  1072. /// Only makes sense for ordered tablet.
  1073. i64 TrimmedRowCount = 0;
  1074. ///
  1075. /// @brief Tablet cell barrier timestamp, which lags behind the current timestamp
  1076. ///
  1077. /// It is guaranteed that all transactions with commit timestamp not exceeding the barrier are fully committed;
  1078. /// e.g. all their added rows are visible (and are included in @ref NYT::TTabletInfo::TotalRowCount).
  1079. /// Mostly makes sense for ordered tablets.
  1080. ui64 BarrierTimestamp;
  1081. };
  1082. ////////////////////////////////////////////////////////////////////////////////
  1083. /// List of attributes to retrieve in operations like @ref NYT::ICypressClient::Get
  1084. struct TAttributeFilter
  1085. {
  1086. /// @cond Doxygen_Suppress
  1087. using TSelf = TAttributeFilter;
  1088. /// @endcond
  1089. /// List of attributes.
  1090. FLUENT_VECTOR_FIELD(TString, Attribute);
  1091. };
  1092. ////////////////////////////////////////////////////////////////////////////////
  1093. ///
  1094. /// @brief Check if none of the fields of @ref NYT::TReadLimit is set.
  1095. ///
  1096. /// @return true if any field of readLimit is set and false otherwise.
  1097. bool IsTrivial(const TReadLimit& readLimit);
  1098. /// Convert yson node type to table schema type
  1099. EValueType NodeTypeToValueType(TNode::EType nodeType);
  1100. ////////////////////////////////////////////////////////////////////////////////
  1101. ///
  1102. /// @brief Enumeration for specifying how reading from master is performed.
  1103. ///
  1104. /// Used in operations like NYT::ICypressClient::Get
  1105. enum class EMasterReadKind : int
  1106. {
  1107. ///
  1108. /// @brief Reading from leader.
  1109. ///
  1110. /// Should almost never be used since it's expensive and for regular uses has no difference from
  1111. /// "follower" read.
  1112. Leader /* "leader" */,
  1113. /// @brief Reading from master follower (default).
  1114. Follower /* "follower" */,
  1115. Cache /* "cache" */,
  1116. MasterCache /* "master_cache" */,
  1117. };
  1118. ////////////////////////////////////////////////////////////////////////////////
  1119. /// @cond Doxygen_Suppress
  1120. namespace NDetail {
  1121. // MUST NOT BE USED BY CLIENTS
  1122. // TODO: we should use default GENERATE_ENUM_SERIALIZATION
  1123. TString ToString(EValueType type);
  1124. } // namespace NDetail
  1125. /// @endcond
  1126. ////////////////////////////////////////////////////////////////////////////////
  1127. } // namespace NYT