structured_table_formats.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. #pragma once
  2. #include <yt/cpp/mapreduce/interface/fwd.h>
  3. #include <yt/cpp/mapreduce/interface/config.h>
  4. #include <yt/cpp/mapreduce/interface/operation.h>
  5. #include <yt/cpp/mapreduce/common/fwd.h>
  6. #include <yt/cpp/mapreduce/http/context.h>
  7. #include <yt/cpp/mapreduce/http/requests.h>
  8. #include <utility>
  9. namespace NYT {
  10. ////////////////////////////////////////////////////////////////////////////////
  11. TMaybe<TNode> GetCommonTableFormat(
  12. const TVector<TMaybe<TNode>>& formats);
  13. TMaybe<TNode> GetTableFormat(
  14. const IClientRetryPolicyPtr& clientRetryPolicy,
  15. const TClientContext& context,
  16. const TTransactionId& transactionId,
  17. const TRichYPath& path);
  18. TMaybe<TNode> GetTableFormats(
  19. const IClientRetryPolicyPtr& clientRetryPolicy,
  20. const TClientContext& context,
  21. const TTransactionId& transactionId,
  22. const TVector<TRichYPath>& paths);
  23. ////////////////////////////////////////////////////////////////////////////////
  24. namespace NDetail {
  25. ////////////////////////////////////////////////////////////////////////////////
  26. enum class EIODirection
  27. {
  28. Input,
  29. Output,
  30. };
  31. ////////////////////////////////////////////////////////////////////////////////
  32. struct TSmallJobFile
  33. {
  34. TString FileName;
  35. TString Data;
  36. };
  37. ////////////////////////////////////////////////////////////////////////////////
  38. // Table that is used while preparing operation formats. Can be real table or intermediate
  39. struct TStructuredJobTable
  40. {
  41. TTableStructure Description;
  42. // Might be null for intermediate tables in MapReduce operation
  43. TMaybe<TRichYPath> RichYPath;
  44. static TStructuredJobTable Intermediate(TTableStructure description)
  45. {
  46. return TStructuredJobTable{std::move(description), Nothing()};
  47. }
  48. };
  49. using TStructuredJobTableList = TVector<TStructuredJobTable>;
  50. TString JobTablePathString(const TStructuredJobTable& jobTable);
  51. TStructuredJobTableList ToStructuredJobTableList(const TVector<TStructuredTablePath>& tableList);
  52. TStructuredJobTableList CanonizeStructuredTableList(const TClientContext& context, const TVector<TStructuredTablePath>& tableList);
  53. TVector<TRichYPath> GetPathList(
  54. const TStructuredJobTableList& tableList,
  55. const TMaybe<TVector<TTableSchema>>& schemaInferenceResult,
  56. bool inferSchema);
  57. ////////////////////////////////////////////////////////////////////////////////
  58. class TFormatBuilder
  59. {
  60. private:
  61. struct TFormatSwitcher;
  62. public:
  63. TFormatBuilder(
  64. IClientRetryPolicyPtr clientRetryPolicy,
  65. TClientContext context,
  66. TTransactionId transactionId,
  67. TOperationOptions operationOptions);
  68. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateFormat(
  69. const IStructuredJob& job,
  70. const EIODirection& direction,
  71. const TStructuredJobTableList& structuredTableList,
  72. const TMaybe<TFormatHints>& formatHints,
  73. ENodeReaderFormat nodeReaderFormat,
  74. bool allowFormatFromTableAttribute);
  75. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateVoidFormat(
  76. const IStructuredJob& job,
  77. const EIODirection& direction,
  78. const TStructuredJobTableList& structuredTableList,
  79. const TMaybe<TFormatHints>& formatHints,
  80. ENodeReaderFormat nodeReaderFormat,
  81. bool allowFormatFromTableAttribute);
  82. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateYamrFormat(
  83. const IStructuredJob& job,
  84. const EIODirection& direction,
  85. const TStructuredJobTableList& structuredTableList,
  86. const TMaybe<TFormatHints>& formatHints,
  87. ENodeReaderFormat nodeReaderFormat,
  88. bool allowFormatFromTableAttribute);
  89. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateNodeFormat(
  90. const IStructuredJob& job,
  91. const EIODirection& direction,
  92. const TStructuredJobTableList& structuredTableList,
  93. const TMaybe<TFormatHints>& formatHints,
  94. ENodeReaderFormat nodeReaderFormat,
  95. bool allowFormatFromTableAttribute);
  96. std::pair<TFormat, TMaybe<TSmallJobFile>> CreateProtobufFormat(
  97. const IStructuredJob& job,
  98. const EIODirection& direction,
  99. const TStructuredJobTableList& structuredTableList,
  100. const TMaybe<TFormatHints>& formatHints,
  101. ENodeReaderFormat nodeReaderFormat,
  102. bool allowFormatFromTableAttribute);
  103. private:
  104. const IClientRetryPolicyPtr ClientRetryPolicy_;
  105. const TClientContext Context_;
  106. const TTransactionId TransactionId_;
  107. const TOperationOptions OperationOptions_;
  108. };
  109. ////////////////////////////////////////////////////////////////////////////////
  110. TMaybe<TTableSchema> GetTableSchema(const TTableStructure& tableStructure);
  111. ////////////////////////////////////////////////////////////////////////////////
  112. } // namespace NDetail
  113. } // namespace NYT