config.h 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. #pragma once
  2. #include "fwd.h"
  3. #include "common.h"
  4. #include <library/cpp/yt/misc/enum.h>
  5. #include <library/cpp/yson/node/node.h>
  6. #include <util/generic/maybe.h>
  7. #include <util/generic/string.h>
  8. #include <util/generic/hash_set.h>
  9. #include <util/datetime/base.h>
  10. namespace NYT {
  11. ////////////////////////////////////////////////////////////////////////////////
  12. extern const TString DefaultRemoteTempTablesDirectory;
  13. extern const TString DefaultRemoteTempFilesDirectory;
  14. ////////////////////////////////////////////////////////////////////////////////
  15. enum EEncoding : int
  16. {
  17. E_IDENTITY /* "identity" */,
  18. E_GZIP /* "gzip" */,
  19. E_BROTLI /* "br" */,
  20. E_Z_LZ4 /* "z-lz4" */,
  21. };
  22. enum class ENodeReaderFormat : int
  23. {
  24. Yson, // Always use YSON format,
  25. Skiff, // Always use Skiff format, throw exception if it's not possible (non-strict schema, dynamic table etc.)
  26. Auto, // Use Skiff format if it's possible, YSON otherwise
  27. };
  28. enum class ETraceHttpRequestsMode
  29. {
  30. // Never dump http requests.
  31. Never /* "never" */,
  32. // Dump failed http requests.
  33. Error /* "error" */,
  34. // Dump all http requests.
  35. Always /* "always" */,
  36. };
  37. DEFINE_ENUM(EUploadDeduplicationMode,
  38. // For each file only one process' thread from all possible hosts can upload it to the file cache at the same time.
  39. // The others will wait for the uploading to finish and use already cached file.
  40. ((Global) (0))
  41. // For each file and each particular host only one process' thread can upload it to the file cache at the same time.
  42. // The others will wait for the uploading to finish and use already cached file.
  43. ((Host) (1))
  44. // All processes' threads will upload a file to the cache concurrently.
  45. ((Disabled) (2))
  46. );
  47. ////////////////////////////////////////////////////////////////////////////////
  48. /// Enum describing possible versions of table writer implemetation.
  49. enum class ETableWriterVersion
  50. {
  51. /// Allow library to choose version of writer.
  52. Auto,
  53. /// Stable but slower version of writer.
  54. V1,
  55. /// Unstable but faster version of writer (going to be default in the future).
  56. V2,
  57. };
  58. ////////////////////////////////////////////////////////////////////////////////
  59. struct TConfig
  60. : public TThrRefBase
  61. {
  62. TString Hosts;
  63. TString Pool;
  64. TString Token;
  65. TString Prefix;
  66. TString ApiVersion;
  67. TString LogLevel;
  68. TString LogPath;
  69. ///
  70. /// For historical reasons mapreduce client uses its own logging system.
  71. ///
  72. /// If this options is set to true library switches to yt/yt/core logging by default.
  73. /// But if user calls @ref NYT::SetLogger library switches back to logger provided by user
  74. /// (except for messages from yt/yt/core).
  75. ///
  76. /// This is temporary option. In future it would be true by default, and then removed.
  77. ///
  78. /// https://st.yandex-team.ru/YT-23645
  79. bool LogUseCore = false;
  80. // Compression for data that is sent to YT cluster.
  81. EEncoding ContentEncoding;
  82. // Compression for data that is read from YT cluster.
  83. EEncoding AcceptEncoding;
  84. TString GlobalTxId;
  85. bool ForceIpV4;
  86. bool ForceIpV6;
  87. bool UseHosts;
  88. TDuration HostListUpdateInterval;
  89. TNode Spec;
  90. TNode TableWriter;
  91. TDuration ConnectTimeout;
  92. TDuration SocketTimeout;
  93. TDuration AddressCacheExpirationTimeout;
  94. TDuration TxTimeout;
  95. TDuration PingTimeout;
  96. TDuration PingInterval;
  97. int AsyncHttpClientThreads;
  98. int AsyncTxPingerPoolThreads;
  99. // How often should we poll for lock state
  100. TDuration WaitLockPollInterval;
  101. TDuration RetryInterval;
  102. TDuration ChunkErrorsRetryInterval;
  103. TDuration RateLimitExceededRetryInterval;
  104. TDuration StartOperationRetryInterval;
  105. int RetryCount;
  106. int ReadRetryCount;
  107. int StartOperationRetryCount;
  108. /// @brief Period for checking status of running operation.
  109. TDuration OperationTrackerPollPeriod = TDuration::Seconds(5);
  110. TString RemoteTempFilesDirectory;
  111. TString RemoteTempTablesDirectory;
  112. // @brief Keep temp tables produced by TTempTable (despite their name). Should not be used in user programs,
  113. // but may be useful for setting via environment variable for debugging purposes.
  114. bool KeepTempTables = false;
  115. //
  116. // Infer schemas for nonexstent tables from typed rows (e.g. protobuf)
  117. // when writing from operation or client writer.
  118. // This options can be overridden in TOperationOptions and TTableWriterOptions.
  119. bool InferTableSchema;
  120. bool UseClientProtobuf;
  121. ENodeReaderFormat NodeReaderFormat;
  122. bool ProtobufFormatWithDescriptors;
  123. int ConnectionPoolSize;
  124. /// Defines replication factor that is used for files that are uploaded to YT
  125. /// to use them in operations.
  126. int FileCacheReplicationFactor = 10;
  127. /// @brief Used when waiting for other process which uploads the same file to the file cache.
  128. ///
  129. /// If CacheUploadDeduplicationMode is not Disabled, current process can wait for some other
  130. /// process which is uploading the same file. This value is proportional to the timeout of waiting,
  131. /// actual timeout computes as follows: fileSizeGb * CacheLockTimeoutPerGb.
  132. /// Default timeout assumes that host has uploading speed equal to 20 Mb/s.
  133. /// If timeout was reached, the file will be uploaded by current process without any other waits.
  134. TDuration CacheLockTimeoutPerGb;
  135. /// @brief Used to prevent concurrent uploading of the same file to the file cache.
  136. /// NB: Each mode affects only users with the same mode enabled.
  137. EUploadDeduplicationMode CacheUploadDeduplicationMode;
  138. // @brief Minimum byte size for files to undergo deduplication at upload
  139. i64 CacheUploadDeduplicationThreshold;
  140. bool MountSandboxInTmpfs;
  141. /// @brief Set upload options (e.g.) for files created by library.
  142. ///
  143. /// Path itself is always ignored but path options (e.g. `BypassArtifactCache`) are used when uploading system files:
  144. /// cppbinary, job state, etc
  145. TRichYPath ApiFilePathOptions;
  146. // Testing options, should never be used in user programs.
  147. bool UseAbortableResponse = false;
  148. bool EnableDebugMetrics = false;
  149. //
  150. // There is optimization used with local YT that enables to skip binary upload and use real binary path.
  151. // When EnableLocalModeOptimization is set to false this optimization is completely disabled.
  152. bool EnableLocalModeOptimization = true;
  153. //
  154. // If you want see stderr even if you jobs not failed set this true.
  155. bool WriteStderrSuccessfulJobs = false;
  156. //
  157. // This configuration is useful for debug.
  158. // If set to ETraceHttpRequestsMode::Error library will dump all http error requests.
  159. // If set to ETraceHttpRequestsMode::All library will dump all http requests.
  160. // All tracing occurres as DEBUG level logging.
  161. ETraceHttpRequestsMode TraceHttpRequestsMode = ETraceHttpRequestsMode::Never;
  162. TString SkynetApiHost;
  163. // Sets SO_PRIORITY option on the socket
  164. TMaybe<int> SocketPriority;
  165. // Framing settings
  166. // (cf. https://ytsaurus.tech/docs/en/user-guide/proxy/http-reference#framing).
  167. THashSet<TString> CommandsWithFraming;
  168. /// Which implemetation of table writer to use.
  169. ETableWriterVersion TableWriterVersion = ETableWriterVersion::Auto;
  170. /// Redirects stdout to stderr for jobs.
  171. bool RedirectStdoutToStderr = false;
  172. static bool GetBool(const char* var, bool defaultValue = false);
  173. static int GetInt(const char* var, int defaultValue);
  174. static TDuration GetDuration(const char* var, TDuration defaultValue);
  175. static EEncoding GetEncoding(const char* var);
  176. static EUploadDeduplicationMode GetUploadingDeduplicationMode(
  177. const char* var,
  178. EUploadDeduplicationMode defaultValue);
  179. static void ValidateToken(const TString& token);
  180. static TString LoadTokenFromFile(const TString& tokenPath);
  181. static TNode LoadJsonSpec(const TString& strSpec);
  182. static TRichYPath LoadApiFilePathOptions(const TString& ysonMap);
  183. void LoadToken();
  184. void LoadSpec();
  185. void LoadTimings();
  186. void Reset();
  187. TConfig();
  188. static TConfigPtr Get();
  189. };
  190. ////////////////////////////////////////////////////////////////////////////////
  191. struct TProcessState
  192. {
  193. TString FqdnHostName;
  194. TString UserName;
  195. int Pid;
  196. TString ClientVersion;
  197. TString BinaryPath;
  198. TString BinaryName;
  199. TProcessState();
  200. static TProcessState* Get();
  201. };
  202. ////////////////////////////////////////////////////////////////////////////////
  203. } // namespace NYT