io-inl.h 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023
  1. #pragma once
  2. #ifndef IO_INL_H_
  3. #error "Direct inclusion of this file is not allowed, use io.h"
  4. #endif
  5. #undef IO_INL_H_
  6. #include "finish_or_die.h"
  7. #include <util/generic/typetraits.h>
  8. #include <util/generic/yexception.h>
  9. #include <util/stream/length.h>
  10. #include <util/system/mutex.h>
  11. #include <util/system/spinlock.h>
  12. #include <library/cpp/yson/node/node_builder.h>
  13. #include <yt/cpp/mapreduce/interface/serialize.h>
  14. namespace NYT {
  15. ////////////////////////////////////////////////////////////////////////////////
  16. namespace NDetail {
  17. template<class T>
  18. struct TIsProtoOneOf
  19. : std::false_type
  20. { };
  21. template <class ...TProtoRowTypes>
  22. struct TIsProtoOneOf<TProtoOneOf<TProtoRowTypes...>>
  23. : std::true_type
  24. { };
  25. template <class T>
  26. struct TIsSkiffRowOneOf
  27. : std::false_type
  28. { };
  29. template <class ...TSkiffRowTypes>
  30. struct TIsSkiffRowOneOf<TSkiffRowOneOf<TSkiffRowTypes...>>
  31. : std::true_type
  32. { };
  33. } // namespace NDetail
  34. ////////////////////////////////////////////////////////////////////////////////
  35. template <class T, class = void>
  36. struct TRowTraits;
  37. template <>
  38. struct TRowTraits<TNode>
  39. {
  40. using TRowType = TNode;
  41. using IReaderImpl = INodeReaderImpl;
  42. using IWriterImpl = INodeWriterImpl;
  43. };
  44. template <>
  45. struct TRowTraits<TYaMRRow>
  46. {
  47. using TRowType = TYaMRRow;
  48. using IReaderImpl = IYaMRReaderImpl;
  49. using IWriterImpl = IYaMRWriterImpl;
  50. };
  51. template <>
  52. struct TRowTraits<Message>
  53. {
  54. using TRowType = Message;
  55. using IReaderImpl = IProtoReaderImpl;
  56. using IWriterImpl = IProtoWriterImpl;
  57. };
  58. template <class T>
  59. struct TRowTraits<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
  60. {
  61. using TRowType = T;
  62. using IReaderImpl = IProtoReaderImpl;
  63. using IWriterImpl = IProtoWriterImpl;
  64. };
  65. template <class T>
  66. struct TRowTraits<T, std::enable_if_t<TIsSkiffRow<T>::value>>
  67. {
  68. using TRowType = T;
  69. using IReaderImpl = ISkiffRowReaderImpl;
  70. };
  71. template <class... TSkiffRowTypes>
  72. struct TRowTraits<TSkiffRowOneOf<TSkiffRowTypes...>>
  73. {
  74. using TRowType = TSkiffRowOneOf<TSkiffRowTypes...>;
  75. using IReaderImpl = ISkiffRowReaderImpl;
  76. };
  77. template <class... TProtoRowTypes>
  78. struct TRowTraits<TProtoOneOf<TProtoRowTypes...>>
  79. {
  80. using TRowType = TProtoOneOf<TProtoRowTypes...>;
  81. using IReaderImpl = IProtoReaderImpl;
  82. using IWriterImpl = IProtoWriterImpl;
  83. };
  84. ////////////////////////////////////////////////////////////////////////////////
  85. struct IReaderImplBase
  86. : public TThrRefBase
  87. {
  88. virtual bool IsValid() const = 0;
  89. virtual void Next() = 0;
  90. virtual ui32 GetTableIndex() const = 0;
  91. virtual ui32 GetRangeIndex() const = 0;
  92. virtual ui64 GetRowIndex() const = 0;
  93. virtual void NextKey() = 0;
  94. // Not pure virtual because of clients that has already implemented this interface.
  95. virtual TMaybe<size_t> GetReadByteCount() const;
  96. virtual i64 GetTabletIndex() const;
  97. virtual bool IsEndOfStream() const;
  98. virtual bool IsRawReaderExhausted() const;
  99. };
  100. struct INodeReaderImpl
  101. : public IReaderImplBase
  102. {
  103. virtual const TNode& GetRow() const = 0;
  104. virtual void MoveRow(TNode* row) = 0;
  105. };
  106. struct IYaMRReaderImpl
  107. : public IReaderImplBase
  108. {
  109. virtual const TYaMRRow& GetRow() const = 0;
  110. virtual void MoveRow(TYaMRRow* row)
  111. {
  112. *row = GetRow();
  113. }
  114. };
  115. struct IProtoReaderImpl
  116. : public IReaderImplBase
  117. {
  118. virtual void ReadRow(Message* row) = 0;
  119. };
  120. struct ISkiffRowReaderImpl
  121. : public IReaderImplBase
  122. {
  123. virtual void ReadRow(const ISkiffRowParserPtr& parser) = 0;
  124. };
  125. ////////////////////////////////////////////////////////////////////////////////
  126. namespace NDetail {
  127. ////////////////////////////////////////////////////////////////////////////////
  128. // We don't include <yt/cpp/mapreduce/interface/logging/yt_log.h> in this file
  129. // to avoid macro name clashes (specifically YT_LOG_DEBUG)
  130. void LogTableReaderStatistics(ui64 rowCount, TMaybe<size_t> byteCount);
  131. template <class T>
  132. class TTableReaderBase
  133. : public TThrRefBase
  134. {
  135. public:
  136. using TRowType = typename TRowTraits<T>::TRowType;
  137. using IReaderImpl = typename TRowTraits<T>::IReaderImpl;
  138. explicit TTableReaderBase(::TIntrusivePtr<IReaderImpl> reader)
  139. : Reader_(reader)
  140. { }
  141. ~TTableReaderBase() override
  142. {
  143. NDetail::LogTableReaderStatistics(ReadRowCount_, Reader_->GetReadByteCount());
  144. }
  145. bool IsValid() const
  146. {
  147. return Reader_->IsValid();
  148. }
  149. void Next()
  150. {
  151. Reader_->Next();
  152. ++ReadRowCount_;
  153. RowState_ = ERowState::None;
  154. }
  155. bool IsEndOfStream()
  156. {
  157. return Reader_->IsEndOfStream();
  158. }
  159. bool IsRawReaderExhausted()
  160. {
  161. return Reader_->IsRawReaderExhausted();
  162. }
  163. ui32 GetTableIndex() const
  164. {
  165. return Reader_->GetTableIndex();
  166. }
  167. ui32 GetRangeIndex() const
  168. {
  169. return Reader_->GetRangeIndex();
  170. }
  171. ui64 GetRowIndex() const
  172. {
  173. return Reader_->GetRowIndex();
  174. }
  175. i64 GetTabletIndex() const
  176. {
  177. return Reader_->GetTabletIndex();
  178. }
  179. protected:
  180. template <typename TCacher, typename TCacheGetter>
  181. const auto& DoGetRowCached(TCacher cacher, TCacheGetter cacheGetter) const
  182. {
  183. switch (RowState_) {
  184. case ERowState::None:
  185. cacher();
  186. RowState_ = ERowState::Cached;
  187. break;
  188. case ERowState::Cached:
  189. break;
  190. case ERowState::MovedOut:
  191. ythrow yexception() << "Row is already moved";
  192. }
  193. return *cacheGetter();
  194. }
  195. template <typename U, typename TMover, typename TCacheMover>
  196. void DoMoveRowCached(U* result, TMover mover, TCacheMover cacheMover)
  197. {
  198. Y_ABORT_UNLESS(result);
  199. switch (RowState_) {
  200. case ERowState::None:
  201. mover(result);
  202. break;
  203. case ERowState::Cached:
  204. cacheMover(result);
  205. break;
  206. case ERowState::MovedOut:
  207. ythrow yexception() << "Row is already moved";
  208. }
  209. RowState_ = ERowState::MovedOut;
  210. }
  211. private:
  212. enum class ERowState
  213. {
  214. None,
  215. Cached,
  216. MovedOut,
  217. };
  218. protected:
  219. ::TIntrusivePtr<IReaderImpl> Reader_;
  220. private:
  221. ui64 ReadRowCount_ = 0;
  222. mutable ERowState RowState_ = ERowState::None;
  223. };
  224. template <class T>
  225. class TSimpleTableReader
  226. : public TTableReaderBase<T>
  227. {
  228. public:
  229. using TBase = TTableReaderBase<T>;
  230. using typename TBase::TRowType;
  231. using TBase::TBase;
  232. const TRowType& GetRow() const
  233. {
  234. // Caching is implemented in underlying reader.
  235. return TBase::DoGetRowCached(
  236. /* cacher */ [&] {},
  237. /* cacheGetter */ [&] {
  238. return &Reader_->GetRow();
  239. });
  240. }
  241. void MoveRow(TRowType* result)
  242. {
  243. // Caching is implemented in underlying reader.
  244. TBase::DoMoveRowCached(
  245. result,
  246. /* mover */ [&] (TRowType* result) {
  247. Reader_->MoveRow(result);
  248. },
  249. /* cacheMover */ [&] (TRowType* result) {
  250. Reader_->MoveRow(result);
  251. });
  252. }
  253. TRowType MoveRow()
  254. {
  255. TRowType result;
  256. MoveRow(&result);
  257. return result;
  258. }
  259. private:
  260. using TBase::Reader_;
  261. };
  262. ////////////////////////////////////////////////////////////////////////////////
  263. } // namespace NDetail
  264. template <>
  265. class TTableReader<TNode>
  266. : public NDetail::TSimpleTableReader<TNode>
  267. {
  268. using TSimpleTableReader<TNode>::TSimpleTableReader;
  269. };
  270. template <>
  271. class TTableReader<TYaMRRow>
  272. : public NDetail::TSimpleTableReader<TYaMRRow>
  273. {
  274. using TSimpleTableReader<TYaMRRow>::TSimpleTableReader;
  275. };
  276. template <>
  277. class TTableReader<Message>
  278. : public NDetail::TTableReaderBase<Message>
  279. {
  280. public:
  281. using TBase = NDetail::TTableReaderBase<Message>;
  282. using TBase::TBase;
  283. template <class U>
  284. const U& GetRow() const
  285. {
  286. static_assert(TIsBaseOf<Message, U>::Value);
  287. return TBase::DoGetRowCached(
  288. /* cacher */ [&] {
  289. CachedRow_.reset(new U);
  290. Reader_->ReadRow(CachedRow_.get());
  291. },
  292. /* cacheGetter */ [&] {
  293. auto result = dynamic_cast<const U*>(CachedRow_.get());
  294. Y_ABORT_UNLESS(result);
  295. return result;
  296. });
  297. }
  298. template <class U>
  299. void MoveRow(U* result)
  300. {
  301. static_assert(TIsBaseOf<Message, U>::Value);
  302. TBase::DoMoveRowCached(
  303. result,
  304. /* mover */ [&] (U* result) {
  305. Reader_->ReadRow(result);
  306. },
  307. /* cacheMover */ [&] (U* result) {
  308. auto cast = dynamic_cast<U*>(CachedRow_.get());
  309. Y_ABORT_UNLESS(cast);
  310. result->Swap(cast);
  311. });
  312. }
  313. template <class U>
  314. U MoveRow()
  315. {
  316. static_assert(TIsBaseOf<Message, U>::Value);
  317. U result;
  318. MoveRow(&result);
  319. return result;
  320. }
  321. ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const
  322. {
  323. return Reader_;
  324. }
  325. private:
  326. using TBase::Reader_;
  327. mutable std::unique_ptr<Message> CachedRow_;
  328. };
  329. template<class... TProtoRowTypes>
  330. class TTableReader<TProtoOneOf<TProtoRowTypes...>>
  331. : public NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>>
  332. {
  333. public:
  334. using TBase = NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>>;
  335. using TBase::TBase;
  336. template <class U>
  337. const U& GetRow() const
  338. {
  339. AssertIsOneOf<U>();
  340. return TBase::DoGetRowCached(
  341. /* cacher */ [&] {
  342. Reader_->ReadRow(&std::get<U>(CachedRows_));
  343. CachedIndex_ = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
  344. },
  345. /* cacheGetter */ [&] {
  346. return &std::get<U>(CachedRows_);
  347. });
  348. }
  349. template <class U>
  350. void MoveRow(U* result)
  351. {
  352. AssertIsOneOf<U>();
  353. return TBase::DoMoveRowCached(
  354. result,
  355. /* mover */ [&] (U* result) {
  356. Reader_->ReadRow(result);
  357. },
  358. /* cacheMover */ [&] (U* result) {
  359. Y_ABORT_UNLESS((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_);
  360. *result = std::move(std::get<U>(CachedRows_));
  361. });
  362. }
  363. template <class U>
  364. U MoveRow()
  365. {
  366. U result;
  367. MoveRow(&result);
  368. return result;
  369. }
  370. ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const
  371. {
  372. return Reader_;
  373. }
  374. private:
  375. using TBase::Reader_;
  376. // std::variant could also be used here, but std::tuple leads to better performance
  377. // because of deallocations that std::variant has to do
  378. mutable std::tuple<TProtoRowTypes...> CachedRows_;
  379. mutable int CachedIndex_;
  380. template <class U>
  381. static constexpr void AssertIsOneOf()
  382. {
  383. static_assert(
  384. (std::is_same<U, TProtoRowTypes>::value || ...),
  385. "Template parameter must be one of TProtoOneOf template parameter");
  386. }
  387. };
  388. template <class T>
  389. class TTableReader<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
  390. : public TTableReader<TProtoOneOf<T>>
  391. {
  392. public:
  393. using TRowType = T;
  394. using TBase = TTableReader<TProtoOneOf<T>>;
  395. using TBase::TBase;
  396. const T& GetRow() const
  397. {
  398. return TBase::template GetRow<T>();
  399. }
  400. void MoveRow(T* result)
  401. {
  402. TBase::template MoveRow<T>(result);
  403. }
  404. T MoveRow()
  405. {
  406. return TBase::template MoveRow<T>();
  407. }
  408. };
  409. template<class... TSkiffRowTypes>
  410. class TTableReader<TSkiffRowOneOf<TSkiffRowTypes...>>
  411. : public NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>>
  412. {
  413. public:
  414. using TBase = NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>>;
  415. using TBase::TBase;
  416. explicit TTableReader(::TIntrusivePtr<typename TBase::IReaderImpl> reader, const TMaybe<TSkiffRowHints>& hints)
  417. : TBase(reader)
  418. , Parsers_({(CreateSkiffParser<TSkiffRowTypes>(&std::get<TSkiffRowTypes>(CachedRows_), hints))...})
  419. { }
  420. template <class U>
  421. const U& GetRow() const
  422. {
  423. AssertIsOneOf<U>();
  424. return TBase::DoGetRowCached(
  425. /* cacher */ [&] {
  426. auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
  427. Reader_->ReadRow(Parsers_[index]);
  428. CachedIndex_ = index;
  429. },
  430. /* cacheGetter */ [&] {
  431. return &std::get<U>(CachedRows_);
  432. });
  433. }
  434. template <class U>
  435. void MoveRow(U* result)
  436. {
  437. AssertIsOneOf<U>();
  438. return TBase::DoMoveRowCached(
  439. result,
  440. /* mover */ [&] (U* result) {
  441. auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
  442. Reader_->ReadRow(Parsers_[index]);
  443. *result = std::move(std::get<U>(CachedRows_));
  444. },
  445. /* cacheMover */ [&] (U* result) {
  446. Y_ABORT_UNLESS((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_);
  447. *result = std::move(std::get<U>(CachedRows_));
  448. });
  449. }
  450. template <class U>
  451. U MoveRow()
  452. {
  453. U result;
  454. MoveRow(&result);
  455. return result;
  456. }
  457. ::TIntrusivePtr<ISkiffRowReaderImpl> GetReaderImpl() const
  458. {
  459. return Reader_;
  460. }
  461. private:
  462. using TBase::Reader_;
  463. // std::variant could also be used here, but std::tuple leads to better performance
  464. // because of deallocations that std::variant has to do
  465. mutable std::tuple<TSkiffRowTypes...> CachedRows_;
  466. mutable std::vector<ISkiffRowParserPtr> Parsers_;
  467. mutable int CachedIndex_;
  468. template <class U>
  469. static constexpr void AssertIsOneOf()
  470. {
  471. static_assert(
  472. (std::is_same<U, TSkiffRowTypes>::value || ...),
  473. "Template parameter must be one of TSkiffRowOneOf template parameter");
  474. }
  475. };
  476. template <class T>
  477. class TTableReader<T, std::enable_if_t<TIsSkiffRow<T>::value>>
  478. : public TTableReader<TSkiffRowOneOf<T>>
  479. {
  480. public:
  481. using TRowType = T;
  482. using TBase = TTableReader<TSkiffRowOneOf<T>>;
  483. using TBase::TBase;
  484. const T& GetRow()
  485. {
  486. return TBase::template GetRow<T>();
  487. }
  488. void MoveRow(T* result)
  489. {
  490. TBase::template MoveRow<T>(result);
  491. }
  492. T MoveRow()
  493. {
  494. return TBase::template MoveRow<T>();
  495. }
  496. };
  497. template <>
  498. inline TTableReaderPtr<TNode> IIOClient::CreateTableReader<TNode>(
  499. const TRichYPath& path, const TTableReaderOptions& options)
  500. {
  501. return new TTableReader<TNode>(CreateNodeReader(path, options));
  502. }
  503. template <>
  504. inline TTableReaderPtr<TYaMRRow> IIOClient::CreateTableReader<TYaMRRow>(
  505. const TRichYPath& path, const TTableReaderOptions& options)
  506. {
  507. return new TTableReader<TYaMRRow>(CreateYaMRReader(path, options));
  508. }
  509. template <class T, class = std::enable_if_t<TIsBaseOf<Message, T>::Value>>
  510. struct TReaderCreator
  511. {
  512. static TTableReaderPtr<T> Create(::TIntrusivePtr<IProtoReaderImpl> reader)
  513. {
  514. return new TTableReader<T>(reader);
  515. }
  516. };
  517. template <class T>
  518. inline TTableReaderPtr<T> IIOClient::CreateTableReader(
  519. const TRichYPath& path, const TTableReaderOptions& options)
  520. {
  521. if constexpr (TIsBaseOf<Message, T>::Value) {
  522. TAutoPtr<T> prototype(new T);
  523. return new TTableReader<T>(CreateProtoReader(path, options, prototype.Get()));
  524. } else if constexpr (TIsSkiffRow<T>::value) {
  525. const auto& hints = options.FormatHints_ ? options.FormatHints_->SkiffRowHints_ : Nothing();
  526. auto schema = GetSkiffSchema<T>(hints);
  527. auto skipper = CreateSkiffSkipper<T>(hints);
  528. return new TTableReader<T>(CreateSkiffRowReader(path, options, skipper, schema), hints);
  529. } else {
  530. static_assert(TDependentFalse<T>, "Unsupported type for table reader");
  531. }
  532. }
  533. ////////////////////////////////////////////////////////////////////////////////
  534. template <typename T>
  535. TTableReaderPtr<T> CreateTableReader(
  536. IInputStream* stream,
  537. const TTableReaderOptions& options)
  538. {
  539. return TReaderCreator<T>::Create(NDetail::CreateProtoReader(stream, options, T::descriptor()));
  540. }
  541. template <class... Ts>
  542. TTableReaderPtr<typename NDetail::TProtoOneOfUnique<Ts...>::TType> CreateProtoMultiTableReader(
  543. IInputStream* stream,
  544. const TTableReaderOptions& options)
  545. {
  546. return new TTableReader<typename NDetail::TProtoOneOfUnique<Ts...>::TType>(
  547. NDetail::CreateProtoReader(stream, options, {Ts::descriptor()...}));
  548. }
  549. template <class T>
  550. TTableReaderPtr<T> CreateProtoMultiTableReader(
  551. IInputStream* stream,
  552. int tableCount,
  553. const TTableReaderOptions& options)
  554. {
  555. static_assert(TIsBaseOf<::google::protobuf::Message, T>::Value);
  556. TVector<const ::google::protobuf::Descriptor*> descriptors(tableCount, T::descriptor());
  557. return new TTableReader<T>(NDetail::CreateProtoReader(stream, options, std::move(descriptors)));
  558. }
  559. ////////////////////////////////////////////////////////////////////////////////
  560. template <class T>
  561. class TTableRangesReader<T>
  562. : public TThrRefBase
  563. {
  564. public:
  565. using TRowType = T;
  566. private:
  567. using TReaderImpl = typename TRowTraits<TRowType>::IReaderImpl;
  568. public:
  569. TTableRangesReader(::TIntrusivePtr<TReaderImpl> readerImpl)
  570. : ReaderImpl_(readerImpl)
  571. , Reader_(MakeIntrusive<TTableReader<TRowType>>(readerImpl))
  572. , IsValid_(Reader_->IsValid())
  573. { }
  574. TTableReader<T>& GetRange()
  575. {
  576. return *Reader_;
  577. }
  578. bool IsValid() const
  579. {
  580. return IsValid_;
  581. }
  582. void Next()
  583. {
  584. ReaderImpl_->NextKey();
  585. if ((IsValid_ = Reader_->IsValid())) {
  586. Reader_->Next();
  587. }
  588. }
  589. private:
  590. ::TIntrusivePtr<TReaderImpl> ReaderImpl_;
  591. ::TIntrusivePtr<TTableReader<TRowType>> Reader_;
  592. bool IsValid_;
  593. };
  594. ////////////////////////////////////////////////////////////////////////////////
  595. template <typename T>
  596. struct IWriterImplBase
  597. : public TThrRefBase
  598. {
  599. virtual void AddRow(const T& row, size_t tableIndex) = 0;
  600. virtual void AddRow(const T& row, size_t tableIndex, size_t /*rowWeight*/)
  601. {
  602. AddRow(row, tableIndex);
  603. }
  604. virtual void AddRow(T&& row, size_t tableIndex) = 0;
  605. virtual void AddRow(T&& row, size_t tableIndex, size_t /*rowWeight*/)
  606. {
  607. AddRow(std::move(row), tableIndex);
  608. }
  609. virtual void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0)
  610. {
  611. for (const auto& row : rowBatch) {
  612. AddRow(row, tableIndex, rowBatchWeight / rowBatch.size());
  613. }
  614. }
  615. virtual void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0)
  616. {
  617. auto rowBatchSize = rowBatch.size();
  618. for (auto&& row : std::move(rowBatch)) {
  619. AddRow(std::move(row), tableIndex, rowBatchWeight / rowBatchSize);
  620. }
  621. }
  622. virtual size_t GetBufferMemoryUsage() const
  623. {
  624. return 0;
  625. }
  626. virtual size_t GetTableCount() const = 0;
  627. virtual void FinishTable(size_t tableIndex) = 0;
  628. virtual void Abort()
  629. { }
  630. };
  631. struct INodeWriterImpl
  632. : public IWriterImplBase<TNode>
  633. {
  634. };
  635. struct IYaMRWriterImpl
  636. : public IWriterImplBase<TYaMRRow>
  637. {
  638. };
  639. struct IProtoWriterImpl
  640. : public IWriterImplBase<Message>
  641. {
  642. };
  643. ////////////////////////////////////////////////////////////////////////////////
  644. template <class T>
  645. class TTableWriterBase
  646. : public TThrRefBase
  647. {
  648. public:
  649. using TRowType = T;
  650. using IWriterImpl = typename TRowTraits<T>::IWriterImpl;
  651. explicit TTableWriterBase(::TIntrusivePtr<IWriterImpl> writer)
  652. : Writer_(writer)
  653. , Locks_(MakeAtomicShared<TVector<TAdaptiveLock>>(writer->GetTableCount()))
  654. { }
  655. void Abort()
  656. {
  657. Writer_->Abort();
  658. }
  659. void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0)
  660. {
  661. DoAddRow<T>(row, tableIndex, rowWeight);
  662. }
  663. void AddRow(T&& row, size_t tableIndex = 0, size_t rowWeight = 0)
  664. {
  665. DoAddRow<T>(std::move(row), tableIndex, rowWeight);
  666. }
  667. void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  668. {
  669. DoAddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight);
  670. }
  671. void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  672. {
  673. DoAddRowBatch<T>(std::move(rowBatch), tableIndex, rowBatchWeight);
  674. }
  675. void Finish()
  676. {
  677. for (size_t i = 0; i < Locks_->size(); ++i) {
  678. auto guard = Guard((*Locks_)[i]);
  679. Writer_->FinishTable(i);
  680. }
  681. }
  682. size_t GetBufferMemoryUsage() const
  683. {
  684. return DoGetBufferMemoryUsage();
  685. }
  686. protected:
  687. template <class U>
  688. void DoAddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0)
  689. {
  690. if (tableIndex >= Locks_->size()) {
  691. ythrow TIOException() <<
  692. "Table index " << tableIndex <<
  693. " is out of range [0, " << Locks_->size() << ")";
  694. }
  695. auto guard = Guard((*Locks_)[tableIndex]);
  696. Writer_->AddRow(row, tableIndex, rowWeight);
  697. }
  698. template <class U>
  699. void DoAddRow(U&& row, size_t tableIndex = 0, size_t rowWeight = 0)
  700. {
  701. if (tableIndex >= Locks_->size()) {
  702. ythrow TIOException() <<
  703. "Table index " << tableIndex <<
  704. " is out of range [0, " << Locks_->size() << ")";
  705. }
  706. auto guard = Guard((*Locks_)[tableIndex]);
  707. Writer_->AddRow(std::move(row), tableIndex, rowWeight);
  708. }
  709. template <class U>
  710. void DoAddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  711. {
  712. if (tableIndex >= Locks_->size()) {
  713. ythrow TIOException() <<
  714. "Table index " << tableIndex <<
  715. " is out of range [0, " << Locks_->size() << ")";
  716. }
  717. auto guard = Guard((*Locks_)[tableIndex]);
  718. Writer_->AddRowBatch(rowBatch, tableIndex, rowBatchWeight);
  719. }
  720. template <class U>
  721. void DoAddRowBatch(TVector<U>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  722. {
  723. if (tableIndex >= Locks_->size()) {
  724. ythrow TIOException() <<
  725. "Table index " << tableIndex <<
  726. " is out of range [0, " << Locks_->size() << ")";
  727. }
  728. auto guard = Guard((*Locks_)[tableIndex]);
  729. Writer_->AddRowBatch(std::move(rowBatch), tableIndex, rowBatchWeight);
  730. }
  731. size_t DoGetBufferMemoryUsage() const
  732. {
  733. return Writer_->GetBufferMemoryUsage();
  734. }
  735. ::TIntrusivePtr<IWriterImpl> GetWriterImpl()
  736. {
  737. return Writer_;
  738. }
  739. private:
  740. ::TIntrusivePtr<IWriterImpl> Writer_;
  741. TAtomicSharedPtr<TVector<TAdaptiveLock>> Locks_;
  742. };
  743. template <>
  744. class TTableWriter<TNode>
  745. : public TTableWriterBase<TNode>
  746. {
  747. public:
  748. using TBase = TTableWriterBase<TNode>;
  749. explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
  750. : TBase(writer)
  751. { }
  752. };
  753. template <>
  754. class TTableWriter<TYaMRRow>
  755. : public TTableWriterBase<TYaMRRow>
  756. {
  757. public:
  758. using TBase = TTableWriterBase<TYaMRRow>;
  759. explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
  760. : TBase(writer)
  761. { }
  762. };
  763. template <>
  764. class TTableWriter<Message>
  765. : public TTableWriterBase<Message>
  766. {
  767. public:
  768. using TBase = TTableWriterBase<Message>;
  769. explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
  770. : TBase(writer)
  771. { }
  772. template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr>
  773. void AddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0)
  774. {
  775. TBase::AddRow(row, tableIndex, rowWeight);
  776. }
  777. template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr>
  778. void AddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  779. {
  780. for (const auto& row : rowBatch) {
  781. AddRow(row, tableIndex, rowBatchWeight / rowBatch.size());
  782. }
  783. }
  784. };
  785. template <class T>
  786. class TTableWriter<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
  787. : public TTableWriter<Message>
  788. {
  789. public:
  790. using TRowType = T;
  791. using TBase = TTableWriter<Message>;
  792. explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
  793. : TBase(writer)
  794. { }
  795. void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0)
  796. {
  797. TBase::AddRow<T>(row, tableIndex, rowWeight);
  798. }
  799. void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  800. {
  801. TBase::AddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight);
  802. }
  803. };
  804. template <>
  805. inline TTableWriterPtr<TNode> IIOClient::CreateTableWriter<TNode>(
  806. const TRichYPath& path, const TTableWriterOptions& options)
  807. {
  808. return new TTableWriter<TNode>(CreateNodeWriter(path, options));
  809. }
  810. template <>
  811. inline TTableWriterPtr<TYaMRRow> IIOClient::CreateTableWriter<TYaMRRow>(
  812. const TRichYPath& path, const TTableWriterOptions& options)
  813. {
  814. return new TTableWriter<TYaMRRow>(CreateYaMRWriter(path, options));
  815. }
  816. template <class T>
  817. inline TTableWriterPtr<T> IIOClient::CreateTableWriter(
  818. const TRichYPath& path, const TTableWriterOptions& options)
  819. {
  820. if constexpr (TIsBaseOf<Message, T>::Value) {
  821. TAutoPtr<T> prototype(new T);
  822. return new TTableWriter<T>(CreateProtoWriter(path, options, prototype.Get()));
  823. } else {
  824. static_assert(TDependentFalse<T>, "Unsupported type for table writer");
  825. }
  826. }
  827. ////////////////////////////////////////////////////////////////////////////////
  828. template <typename T>
  829. TTableReaderPtr<T> CreateConcreteProtobufReader(TTableReader<Message>* reader)
  830. {
  831. static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)");
  832. Y_ENSURE(reader, "reader must be non-null");
  833. return ::MakeIntrusive<TTableReader<T>>(reader->GetReaderImpl());
  834. }
  835. template <typename T>
  836. TTableReaderPtr<T> CreateConcreteProtobufReader(const TTableReaderPtr<Message>& reader)
  837. {
  838. Y_ENSURE(reader, "reader must be non-null");
  839. return CreateConcreteProtobufReader<T>(reader.Get());
  840. }
  841. template <typename T>
  842. TTableReaderPtr<Message> CreateGenericProtobufReader(TTableReader<T>* reader)
  843. {
  844. static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)");
  845. Y_ENSURE(reader, "reader must be non-null");
  846. return ::MakeIntrusive<TTableReader<Message>>(reader->GetReaderImpl());
  847. }
  848. template <typename T>
  849. TTableReaderPtr<Message> CreateGenericProtobufReader(const TTableReaderPtr<T>& reader)
  850. {
  851. Y_ENSURE(reader, "reader must be non-null");
  852. return CreateGenericProtobufReader(reader.Get());
  853. }
  854. ////////////////////////////////////////////////////////////////////////////////
  855. } // namespace NYT