structured_table_formats.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. #pragma once
  2. #include <yt/cpp/mapreduce/interface/fwd.h>
  3. #include <yt/cpp/mapreduce/interface/config.h>
  4. #include <yt/cpp/mapreduce/interface/operation.h>
  5. #include <yt/cpp/mapreduce/common/fwd.h>
  6. #include <yt/cpp/mapreduce/http/context.h>
  7. #include <yt/cpp/mapreduce/http/requests.h>
  8. #include <utility>
  9. namespace NYT {
  10. ////////////////////////////////////////////////////////////////////////////////
  11. TMaybe<TNode> GetCommonTableFormat(
  12. const TVector<TMaybe<TNode>>& formats);
  13. TMaybe<TNode> GetTableFormat(
  14. const IClientRetryPolicyPtr& clientRetryPolicy,
  15. const IRawClientPtr& rawClient,
  16. const TTransactionId& transactionId,
  17. const TRichYPath& path);
  18. TMaybe<TNode> GetTableFormats(
  19. const IClientRetryPolicyPtr& clientRetryPolicy,
  20. const IRawClientPtr& rawClient,
  21. const TTransactionId& transactionId,
  22. const TVector<TRichYPath>& paths);
  23. ////////////////////////////////////////////////////////////////////////////////
  24. namespace NDetail {
  25. ////////////////////////////////////////////////////////////////////////////////
  26. enum class EIODirection
  27. {
  28. Input,
  29. Output,
  30. };
  31. ////////////////////////////////////////////////////////////////////////////////
  32. struct TSmallJobFile
  33. {
  34. TString FileName;
  35. TString Data;
  36. };
  37. ////////////////////////////////////////////////////////////////////////////////
  38. // Table that is used while preparing operation formats. Can be real table or intermediate
  39. struct TStructuredJobTable
  40. {
  41. TTableStructure Description;
  42. // Might be null for intermediate tables in MapReduce operation
  43. TMaybe<TRichYPath> RichYPath;
  44. static TStructuredJobTable Intermediate(TTableStructure description)
  45. {
  46. return TStructuredJobTable{std::move(description), Nothing()};
  47. }
  48. };
  49. using TStructuredJobTableList = TVector<TStructuredJobTable>;
  50. TString JobTablePathString(const TStructuredJobTable& jobTable);
  51. TStructuredJobTableList ToStructuredJobTableList(const TVector<TStructuredTablePath>& tableList);
  52. TStructuredJobTableList CanonizeStructuredTableList(const IRawClientPtr& rawClient, const TVector<TStructuredTablePath>& tableList);
  53. TVector<TRichYPath> GetPathList(
  54. const TStructuredJobTableList& tableList,
  55. const TMaybe<TVector<TTableSchema>>& schemaInferenceResult,
  56. bool inferSchema);
  57. ////////////////////////////////////////////////////////////////////////////////
  58. class TFormatBuilder
  59. {
  60. private:
  61. struct TFormatSwitcher;
  62. public:
  63. TFormatBuilder(
  64. IRawClientPtr rawClient,
  65. IClientRetryPolicyPtr clientRetryPolicy,
  66. TClientContext context,
  67. TTransactionId transactionId,
  68. TOperationOptions operationOptions);
  69. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateFormat(
  70. const IStructuredJob& job,
  71. const EIODirection& direction,
  72. const TStructuredJobTableList& structuredTableList,
  73. const TMaybe<TFormatHints>& formatHints,
  74. ENodeReaderFormat nodeReaderFormat,
  75. bool allowFormatFromTableAttribute);
  76. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateVoidFormat(
  77. const IStructuredJob& job,
  78. const EIODirection& direction,
  79. const TStructuredJobTableList& structuredTableList,
  80. const TMaybe<TFormatHints>& formatHints,
  81. ENodeReaderFormat nodeReaderFormat,
  82. bool allowFormatFromTableAttribute);
  83. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateYamrFormat(
  84. const IStructuredJob& job,
  85. const EIODirection& direction,
  86. const TStructuredJobTableList& structuredTableList,
  87. const TMaybe<TFormatHints>& formatHints,
  88. ENodeReaderFormat nodeReaderFormat,
  89. bool allowFormatFromTableAttribute);
  90. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateNodeFormat(
  91. const IStructuredJob& job,
  92. const EIODirection& direction,
  93. const TStructuredJobTableList& structuredTableList,
  94. const TMaybe<TFormatHints>& formatHints,
  95. ENodeReaderFormat nodeReaderFormat,
  96. bool allowFormatFromTableAttribute);
  97. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateProtobufFormat(
  98. const IStructuredJob& job,
  99. const EIODirection& direction,
  100. const TStructuredJobTableList& structuredTableList,
  101. const TMaybe<TFormatHints>& formatHints,
  102. ENodeReaderFormat nodeReaderFormat,
  103. bool allowFormatFromTableAttribute);
  104. private:
  105. const IRawClientPtr RawClient_;
  106. const IClientRetryPolicyPtr ClientRetryPolicy_;
  107. const TClientContext Context_;
  108. const TTransactionId TransactionId_;
  109. const TOperationOptions OperationOptions_;
  110. };
  111. ////////////////////////////////////////////////////////////////////////////////
  112. TMaybe<TTableSchema> GetTableSchema(const TTableStructure& tableStructure);
  113. ////////////////////////////////////////////////////////////////////////////////
  114. } // namespace NDetail
  115. } // namespace NYT