io-inl.h 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. #pragma once
  2. #ifndef IO_INL_H_
  3. #error "Direct inclusion of this file is not allowed, use io.h"
  4. #endif
  5. #undef IO_INL_H_
  6. #include "finish_or_die.h"
  7. #include <util/generic/typetraits.h>
  8. #include <util/generic/yexception.h>
  9. #include <util/stream/length.h>
  10. #include <util/system/mutex.h>
  11. #include <util/system/spinlock.h>
  12. #include <library/cpp/yson/node/node_builder.h>
  13. #include <yt/cpp/mapreduce/interface/serialize.h>
  14. namespace NYT {
  15. ////////////////////////////////////////////////////////////////////////////////
  16. namespace NDetail {
  17. template<class T>
  18. struct TIsProtoOneOf
  19. : std::false_type
  20. { };
  21. template <class ...TProtoRowTypes>
  22. struct TIsProtoOneOf<TProtoOneOf<TProtoRowTypes...>>
  23. : std::true_type
  24. { };
  25. template <class T>
  26. struct TIsSkiffRowOneOf
  27. : std::false_type
  28. { };
  29. template <class ...TSkiffRowTypes>
  30. struct TIsSkiffRowOneOf<TSkiffRowOneOf<TSkiffRowTypes...>>
  31. : std::true_type
  32. { };
  33. } // namespace NDetail
  34. ////////////////////////////////////////////////////////////////////////////////
  35. template <class T, class = void>
  36. struct TRowTraits;
  37. template <>
  38. struct TRowTraits<TNode>
  39. {
  40. using TRowType = TNode;
  41. using IReaderImpl = INodeReaderImpl;
  42. using IWriterImpl = INodeWriterImpl;
  43. };
  44. template <>
  45. struct TRowTraits<TYaMRRow>
  46. {
  47. using TRowType = TYaMRRow;
  48. using IReaderImpl = IYaMRReaderImpl;
  49. using IWriterImpl = IYaMRWriterImpl;
  50. };
  51. template <>
  52. struct TRowTraits<Message>
  53. {
  54. using TRowType = Message;
  55. using IReaderImpl = IProtoReaderImpl;
  56. using IWriterImpl = IProtoWriterImpl;
  57. };
  58. template <class T>
  59. struct TRowTraits<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
  60. {
  61. using TRowType = T;
  62. using IReaderImpl = IProtoReaderImpl;
  63. using IWriterImpl = IProtoWriterImpl;
  64. };
  65. template <class T>
  66. struct TRowTraits<T, std::enable_if_t<TIsSkiffRow<T>::value>>
  67. {
  68. using TRowType = T;
  69. using IReaderImpl = ISkiffRowReaderImpl;
  70. };
  71. template <class... TSkiffRowTypes>
  72. struct TRowTraits<TSkiffRowOneOf<TSkiffRowTypes...>>
  73. {
  74. using TRowType = TSkiffRowOneOf<TSkiffRowTypes...>;
  75. using IReaderImpl = ISkiffRowReaderImpl;
  76. };
  77. template <class... TProtoRowTypes>
  78. struct TRowTraits<TProtoOneOf<TProtoRowTypes...>>
  79. {
  80. using TRowType = TProtoOneOf<TProtoRowTypes...>;
  81. using IReaderImpl = IProtoReaderImpl;
  82. using IWriterImpl = IProtoWriterImpl;
  83. };
  84. ////////////////////////////////////////////////////////////////////////////////
  85. struct IReaderImplBase
  86. : public TThrRefBase
  87. {
  88. virtual bool IsValid() const = 0;
  89. virtual void Next() = 0;
  90. virtual ui32 GetTableIndex() const = 0;
  91. virtual ui32 GetRangeIndex() const = 0;
  92. virtual ui64 GetRowIndex() const = 0;
  93. virtual void NextKey() = 0;
  94. // Not pure virtual because of clients that has already implemented this interface.
  95. virtual TMaybe<size_t> GetReadByteCount() const;
  96. virtual i64 GetTabletIndex() const;
  97. virtual bool IsEndOfStream() const;
  98. virtual bool IsRawReaderExhausted() const;
  99. };
  100. struct INodeReaderImpl
  101. : public IReaderImplBase
  102. {
  103. virtual const TNode& GetRow() const = 0;
  104. virtual void MoveRow(TNode* row) = 0;
  105. };
  106. struct IYaMRReaderImpl
  107. : public IReaderImplBase
  108. {
  109. virtual const TYaMRRow& GetRow() const = 0;
  110. virtual void MoveRow(TYaMRRow* row)
  111. {
  112. *row = GetRow();
  113. }
  114. };
  115. struct IProtoReaderImpl
  116. : public IReaderImplBase
  117. {
  118. virtual void ReadRow(Message* row) = 0;
  119. };
  120. struct ISkiffRowReaderImpl
  121. : public IReaderImplBase
  122. {
  123. virtual void ReadRow(const ISkiffRowParserPtr& parser) = 0;
  124. };
  125. ////////////////////////////////////////////////////////////////////////////////
  126. namespace NDetail {
  127. ////////////////////////////////////////////////////////////////////////////////
  128. // We don't include <yt/cpp/mapreduce/interface/logging/yt_log.h> in this file
  129. // to avoid macro name clashes (specifically YT_LOG_DEBUG)
  130. void LogTableReaderStatistics(ui64 rowCount, TMaybe<size_t> byteCount);
  131. template <class T>
  132. class TTableReaderBase
  133. : public TThrRefBase
  134. {
  135. public:
  136. using TRowType = typename TRowTraits<T>::TRowType;
  137. using IReaderImpl = typename TRowTraits<T>::IReaderImpl;
  138. explicit TTableReaderBase(::TIntrusivePtr<IReaderImpl> reader)
  139. : Reader_(reader)
  140. { }
  141. ~TTableReaderBase() override
  142. {
  143. NDetail::LogTableReaderStatistics(ReadRowCount_, Reader_->GetReadByteCount());
  144. }
  145. bool IsValid() const
  146. {
  147. return Reader_->IsValid();
  148. }
  149. void Next()
  150. {
  151. Reader_->Next();
  152. ++ReadRowCount_;
  153. RowState_ = ERowState::None;
  154. }
  155. bool IsEndOfStream()
  156. {
  157. return Reader_->IsEndOfStream();
  158. }
  159. bool IsRawReaderExhausted()
  160. {
  161. return Reader_->IsRawReaderExhausted();
  162. }
  163. ui32 GetTableIndex() const
  164. {
  165. return Reader_->GetTableIndex();
  166. }
  167. ui32 GetRangeIndex() const
  168. {
  169. return Reader_->GetRangeIndex();
  170. }
  171. ui64 GetRowIndex() const
  172. {
  173. return Reader_->GetRowIndex();
  174. }
  175. i64 GetTabletIndex() const
  176. {
  177. return Reader_->GetTabletIndex();
  178. }
  179. protected:
  180. template <typename TCacher, typename TCacheGetter>
  181. const auto& DoGetRowCached(TCacher cacher, TCacheGetter cacheGetter) const
  182. {
  183. switch (RowState_) {
  184. case ERowState::None:
  185. cacher();
  186. RowState_ = ERowState::Cached;
  187. break;
  188. case ERowState::Cached:
  189. break;
  190. case ERowState::MovedOut:
  191. ythrow yexception() << "Row is already moved";
  192. }
  193. return *cacheGetter();
  194. }
  195. template <typename U, typename TMover, typename TCacheMover>
  196. void DoMoveRowCached(U* result, TMover mover, TCacheMover cacheMover)
  197. {
  198. Y_ABORT_UNLESS(result);
  199. switch (RowState_) {
  200. case ERowState::None:
  201. mover(result);
  202. break;
  203. case ERowState::Cached:
  204. cacheMover(result);
  205. break;
  206. case ERowState::MovedOut:
  207. ythrow yexception() << "Row is already moved";
  208. }
  209. RowState_ = ERowState::MovedOut;
  210. }
  211. private:
  212. enum class ERowState
  213. {
  214. None,
  215. Cached,
  216. MovedOut,
  217. };
  218. protected:
  219. ::TIntrusivePtr<IReaderImpl> Reader_;
  220. private:
  221. ui64 ReadRowCount_ = 0;
  222. mutable ERowState RowState_ = ERowState::None;
  223. };
  224. template <class T>
  225. class TSimpleTableReader
  226. : public TTableReaderBase<T>
  227. {
  228. public:
  229. using TBase = TTableReaderBase<T>;
  230. using typename TBase::TRowType;
  231. using TBase::TBase;
  232. const TRowType& GetRow() const
  233. {
  234. // Caching is implemented in underlying reader.
  235. return TBase::DoGetRowCached(
  236. /* cacher */ [&] {},
  237. /* cacheGetter */ [&] {
  238. return &Reader_->GetRow();
  239. });
  240. }
  241. void MoveRow(TRowType* result)
  242. {
  243. // Caching is implemented in underlying reader.
  244. TBase::DoMoveRowCached(
  245. result,
  246. /* mover */ [&] (TRowType* result) {
  247. Reader_->MoveRow(result);
  248. },
  249. /* cacheMover */ [&] (TRowType* result) {
  250. Reader_->MoveRow(result);
  251. });
  252. }
  253. TRowType MoveRow()
  254. {
  255. TRowType result;
  256. MoveRow(&result);
  257. return result;
  258. }
  259. private:
  260. using TBase::Reader_;
  261. };
  262. ////////////////////////////////////////////////////////////////////////////////
  263. } // namespace NDetail
  264. template <>
  265. class TTableReader<TNode>
  266. : public NDetail::TSimpleTableReader<TNode>
  267. {
  268. using TSimpleTableReader<TNode>::TSimpleTableReader;
  269. };
  270. template <>
  271. class TTableReader<TYaMRRow>
  272. : public NDetail::TSimpleTableReader<TYaMRRow>
  273. {
  274. using TSimpleTableReader<TYaMRRow>::TSimpleTableReader;
  275. };
  276. template <>
  277. class TTableReader<Message>
  278. : public NDetail::TTableReaderBase<Message>
  279. {
  280. public:
  281. using TBase = NDetail::TTableReaderBase<Message>;
  282. using TBase::TBase;
  283. template <class U>
  284. const U& GetRow() const
  285. {
  286. static_assert(TIsBaseOf<Message, U>::Value);
  287. return TBase::DoGetRowCached(
  288. /* cacher */ [&] {
  289. CachedRow_.Reset(new U);
  290. Reader_->ReadRow(CachedRow_.Get());
  291. },
  292. /* cacheGetter */ [&] {
  293. auto result = dynamic_cast<const U*>(CachedRow_.Get());
  294. Y_ABORT_UNLESS(result);
  295. return result;
  296. });
  297. }
  298. template <class U>
  299. void MoveRow(U* result)
  300. {
  301. static_assert(TIsBaseOf<Message, U>::Value);
  302. TBase::DoMoveRowCached(
  303. result,
  304. /* mover */ [&] (U* result) {
  305. Reader_->ReadRow(result);
  306. },
  307. /* cacheMover */ [&] (U* result) {
  308. auto cast = dynamic_cast<U*>(CachedRow_.Get());
  309. Y_ABORT_UNLESS(cast);
  310. result->Swap(cast);
  311. });
  312. }
  313. template <class U>
  314. U MoveRow()
  315. {
  316. static_assert(TIsBaseOf<Message, U>::Value);
  317. U result;
  318. MoveRow(&result);
  319. return result;
  320. }
  321. ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const
  322. {
  323. return Reader_;
  324. }
  325. private:
  326. using TBase::Reader_;
  327. mutable THolder<Message> CachedRow_;
  328. };
  329. template<class... TProtoRowTypes>
  330. class TTableReader<TProtoOneOf<TProtoRowTypes...>>
  331. : public NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>>
  332. {
  333. public:
  334. using TBase = NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>>;
  335. using TBase::TBase;
  336. template <class U>
  337. const U& GetRow() const
  338. {
  339. AssertIsOneOf<U>();
  340. return TBase::DoGetRowCached(
  341. /* cacher */ [&] {
  342. Reader_->ReadRow(&std::get<U>(CachedRows_));
  343. CachedIndex_ = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
  344. },
  345. /* cacheGetter */ [&] {
  346. return &std::get<U>(CachedRows_);
  347. });
  348. }
  349. template <class U>
  350. void MoveRow(U* result)
  351. {
  352. AssertIsOneOf<U>();
  353. return TBase::DoMoveRowCached(
  354. result,
  355. /* mover */ [&] (U* result) {
  356. Reader_->ReadRow(result);
  357. },
  358. /* cacheMover */ [&] (U* result) {
  359. Y_ABORT_UNLESS((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_);
  360. *result = std::move(std::get<U>(CachedRows_));
  361. });
  362. }
  363. template <class U>
  364. U MoveRow()
  365. {
  366. U result;
  367. MoveRow(&result);
  368. return result;
  369. }
  370. ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const
  371. {
  372. return Reader_;
  373. }
  374. private:
  375. using TBase::Reader_;
  376. // std::variant could also be used here, but std::tuple leads to better performance
  377. // because of deallocations that std::variant has to do
  378. mutable std::tuple<TProtoRowTypes...> CachedRows_;
  379. mutable int CachedIndex_;
  380. template <class U>
  381. static constexpr void AssertIsOneOf()
  382. {
  383. static_assert(
  384. (std::is_same<U, TProtoRowTypes>::value || ...),
  385. "Template parameter must be one of TProtoOneOf template parameter");
  386. }
  387. };
  388. template <class T>
  389. class TTableReader<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
  390. : public TTableReader<TProtoOneOf<T>>
  391. {
  392. public:
  393. using TRowType = T;
  394. using TBase = TTableReader<TProtoOneOf<T>>;
  395. using TBase::TBase;
  396. const T& GetRow() const
  397. {
  398. return TBase::template GetRow<T>();
  399. }
  400. void MoveRow(T* result)
  401. {
  402. TBase::template MoveRow<T>(result);
  403. }
  404. T MoveRow()
  405. {
  406. return TBase::template MoveRow<T>();
  407. }
  408. };
  409. template<class... TSkiffRowTypes>
  410. class TTableReader<TSkiffRowOneOf<TSkiffRowTypes...>>
  411. : public NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>>
  412. {
  413. public:
  414. using TBase = NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>>;
  415. using TBase::TBase;
  416. explicit TTableReader(::TIntrusivePtr<typename TBase::IReaderImpl> reader, const TMaybe<TSkiffRowHints>& hints)
  417. : TBase(reader)
  418. , Parsers_({(CreateSkiffParser<TSkiffRowTypes>(&std::get<TSkiffRowTypes>(CachedRows_), hints))...})
  419. { }
  420. template <class U>
  421. const U& GetRow() const
  422. {
  423. AssertIsOneOf<U>();
  424. return TBase::DoGetRowCached(
  425. /* cacher */ [&] {
  426. auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
  427. Reader_->ReadRow(Parsers_[index]);
  428. CachedIndex_ = index;
  429. },
  430. /* cacheGetter */ [&] {
  431. return &std::get<U>(CachedRows_);
  432. });
  433. }
  434. template <class U>
  435. void MoveRow(U* result)
  436. {
  437. AssertIsOneOf<U>();
  438. return TBase::DoMoveRowCached(
  439. result,
  440. /* mover */ [&] (U* result) {
  441. auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
  442. Reader_->ReadRow(Parsers_[index]);
  443. *result = std::move(std::get<U>(CachedRows_));
  444. },
  445. /* cacheMover */ [&] (U* result) {
  446. Y_ABORT_UNLESS((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_);
  447. *result = std::move(std::get<U>(CachedRows_));
  448. });
  449. }
  450. template <class U>
  451. U MoveRow()
  452. {
  453. U result;
  454. MoveRow(&result);
  455. return result;
  456. }
  457. ::TIntrusivePtr<ISkiffRowReaderImpl> GetReaderImpl() const
  458. {
  459. return Reader_;
  460. }
  461. private:
  462. using TBase::Reader_;
  463. // std::variant could also be used here, but std::tuple leads to better performance
  464. // because of deallocations that std::variant has to do
  465. mutable std::tuple<TSkiffRowTypes...> CachedRows_;
  466. mutable std::vector<ISkiffRowParserPtr> Parsers_;
  467. mutable int CachedIndex_;
  468. template <class U>
  469. static constexpr void AssertIsOneOf()
  470. {
  471. static_assert(
  472. (std::is_same<U, TSkiffRowTypes>::value || ...),
  473. "Template parameter must be one of TSkiffRowOneOf template parameter");
  474. }
  475. };
  476. template <class T>
  477. class TTableReader<T, std::enable_if_t<TIsSkiffRow<T>::value>>
  478. : public TTableReader<TSkiffRowOneOf<T>>
  479. {
  480. public:
  481. using TRowType = T;
  482. using TBase = TTableReader<TSkiffRowOneOf<T>>;
  483. using TBase::TBase;
  484. const T& GetRow()
  485. {
  486. return TBase::template GetRow<T>();
  487. }
  488. void MoveRow(T* result)
  489. {
  490. TBase::template MoveRow<T>(result);
  491. }
  492. T MoveRow()
  493. {
  494. return TBase::template MoveRow<T>();
  495. }
  496. };
  497. template <>
  498. inline TTableReaderPtr<TNode> IIOClient::CreateTableReader<TNode>(
  499. const TRichYPath& path, const TTableReaderOptions& options)
  500. {
  501. return new TTableReader<TNode>(CreateNodeReader(path, options));
  502. }
  503. template <>
  504. inline TTableReaderPtr<TYaMRRow> IIOClient::CreateTableReader<TYaMRRow>(
  505. const TRichYPath& path, const TTableReaderOptions& options)
  506. {
  507. return new TTableReader<TYaMRRow>(CreateYaMRReader(path, options));
  508. }
  509. template <class T, class = std::enable_if_t<TIsBaseOf<Message, T>::Value>>
  510. struct TReaderCreator
  511. {
  512. static TTableReaderPtr<T> Create(::TIntrusivePtr<IProtoReaderImpl> reader)
  513. {
  514. return new TTableReader<T>(reader);
  515. }
  516. };
  517. template <class T>
  518. inline TTableReaderPtr<T> IIOClient::CreateTableReader(
  519. const TRichYPath& path, const TTableReaderOptions& options)
  520. {
  521. if constexpr (TIsBaseOf<Message, T>::Value) {
  522. TAutoPtr<T> prototype(new T);
  523. return new TTableReader<T>(CreateProtoReader(path, options, prototype.Get()));
  524. } else if constexpr (TIsSkiffRow<T>::value) {
  525. const auto& hints = options.FormatHints_ ? options.FormatHints_->SkiffRowHints_ : Nothing();
  526. auto schema = GetSkiffSchema<T>(hints);
  527. auto skipper = CreateSkiffSkipper<T>(hints);
  528. return new TTableReader<T>(CreateSkiffRowReader(path, options, skipper, schema), hints);
  529. } else {
  530. static_assert(TDependentFalse<T>, "Unsupported type for table reader");
  531. }
  532. }
  533. ////////////////////////////////////////////////////////////////////////////////
  534. template <typename T>
  535. TTableReaderPtr<T> CreateTableReader(
  536. IInputStream* stream,
  537. const TTableReaderOptions& options)
  538. {
  539. return TReaderCreator<T>::Create(NDetail::CreateProtoReader(stream, options, T::descriptor()));
  540. }
  541. template <class... Ts>
  542. TTableReaderPtr<typename NDetail::TProtoOneOfUnique<Ts...>::TType> CreateProtoMultiTableReader(
  543. IInputStream* stream,
  544. const TTableReaderOptions& options)
  545. {
  546. return new TTableReader<typename NDetail::TProtoOneOfUnique<Ts...>::TType>(
  547. NDetail::CreateProtoReader(stream, options, {Ts::descriptor()...}));
  548. }
  549. template <class T>
  550. TTableReaderPtr<T> CreateProtoMultiTableReader(
  551. IInputStream* stream,
  552. int tableCount,
  553. const TTableReaderOptions& options)
  554. {
  555. static_assert(TIsBaseOf<::google::protobuf::Message, T>::Value);
  556. TVector<const ::google::protobuf::Descriptor*> descriptors(tableCount, T::descriptor());
  557. return new TTableReader<T>(NDetail::CreateProtoReader(stream, options, std::move(descriptors)));
  558. }
  559. ////////////////////////////////////////////////////////////////////////////////
  560. template <class T>
  561. class TTableRangesReader<T>
  562. : public TThrRefBase
  563. {
  564. public:
  565. using TRowType = T;
  566. private:
  567. using TReaderImpl = typename TRowTraits<TRowType>::IReaderImpl;
  568. public:
  569. TTableRangesReader(::TIntrusivePtr<TReaderImpl> readerImpl)
  570. : ReaderImpl_(readerImpl)
  571. , Reader_(MakeIntrusive<TTableReader<TRowType>>(readerImpl))
  572. , IsValid_(Reader_->IsValid())
  573. { }
  574. TTableReader<T>& GetRange()
  575. {
  576. return *Reader_;
  577. }
  578. bool IsValid() const
  579. {
  580. return IsValid_;
  581. }
  582. void Next()
  583. {
  584. ReaderImpl_->NextKey();
  585. if ((IsValid_ = Reader_->IsValid())) {
  586. Reader_->Next();
  587. }
  588. }
  589. private:
  590. ::TIntrusivePtr<TReaderImpl> ReaderImpl_;
  591. ::TIntrusivePtr<TTableReader<TRowType>> Reader_;
  592. bool IsValid_;
  593. };
  594. ////////////////////////////////////////////////////////////////////////////////
  595. template <typename T>
  596. struct IWriterImplBase
  597. : public TThrRefBase
  598. {
  599. virtual void AddRow(const T& row, size_t tableIndex) = 0;
  600. virtual void AddRow(const T& row, size_t tableIndex, size_t /*rowWeight*/)
  601. {
  602. AddRow(row, tableIndex);
  603. }
  604. virtual void AddRow(T&& row, size_t tableIndex) = 0;
  605. virtual void AddRow(T&& row, size_t tableIndex, size_t /*rowWeight*/)
  606. {
  607. AddRow(std::move(row), tableIndex);
  608. }
  609. virtual void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0)
  610. {
  611. for (const auto& row : rowBatch) {
  612. AddRow(row, tableIndex, rowBatchWeight / rowBatch.size());
  613. }
  614. }
  615. virtual void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0)
  616. {
  617. auto rowBatchSize = rowBatch.size();
  618. for (auto&& row : std::move(rowBatch)) {
  619. AddRow(std::move(row), tableIndex, rowBatchWeight / rowBatchSize);
  620. }
  621. }
  622. virtual size_t GetBufferMemoryUsage() const
  623. {
  624. return 0;
  625. }
  626. virtual size_t GetTableCount() const = 0;
  627. virtual void FinishTable(size_t tableIndex) = 0;
  628. virtual void Abort()
  629. { }
  630. };
  631. struct INodeWriterImpl
  632. : public IWriterImplBase<TNode>
  633. {
  634. };
  635. struct IYaMRWriterImpl
  636. : public IWriterImplBase<TYaMRRow>
  637. {
  638. };
  639. struct IProtoWriterImpl
  640. : public IWriterImplBase<Message>
  641. {
  642. };
  643. ////////////////////////////////////////////////////////////////////////////////
  644. template <class T>
  645. class TTableWriterBase
  646. : public TThrRefBase
  647. {
  648. public:
  649. using TRowType = T;
  650. using IWriterImpl = typename TRowTraits<T>::IWriterImpl;
  651. explicit TTableWriterBase(::TIntrusivePtr<IWriterImpl> writer)
  652. : Writer_(writer)
  653. , Locks_(MakeAtomicShared<TVector<TAdaptiveLock>>(writer->GetTableCount()))
  654. { }
  655. ~TTableWriterBase() override
  656. {
  657. if (Locks_.RefCount() == 1) {
  658. NDetail::FinishOrDie(this, "TTableWriterBase");
  659. }
  660. }
  661. void Abort()
  662. {
  663. Writer_->Abort();
  664. }
  665. void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0)
  666. {
  667. DoAddRow<T>(row, tableIndex, rowWeight);
  668. }
  669. void AddRow(T&& row, size_t tableIndex = 0, size_t rowWeight = 0)
  670. {
  671. DoAddRow<T>(std::move(row), tableIndex, rowWeight);
  672. }
  673. void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  674. {
  675. DoAddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight);
  676. }
  677. void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  678. {
  679. DoAddRowBatch<T>(std::move(rowBatch), tableIndex, rowBatchWeight);
  680. }
  681. void Finish()
  682. {
  683. for (size_t i = 0; i < Locks_->size(); ++i) {
  684. auto guard = Guard((*Locks_)[i]);
  685. Writer_->FinishTable(i);
  686. }
  687. }
  688. size_t GetBufferMemoryUsage() const
  689. {
  690. return DoGetBufferMemoryUsage();
  691. }
  692. protected:
  693. template <class U>
  694. void DoAddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0)
  695. {
  696. if (tableIndex >= Locks_->size()) {
  697. ythrow TIOException() <<
  698. "Table index " << tableIndex <<
  699. " is out of range [0, " << Locks_->size() << ")";
  700. }
  701. auto guard = Guard((*Locks_)[tableIndex]);
  702. Writer_->AddRow(row, tableIndex, rowWeight);
  703. }
  704. template <class U>
  705. void DoAddRow(U&& row, size_t tableIndex = 0, size_t rowWeight = 0)
  706. {
  707. if (tableIndex >= Locks_->size()) {
  708. ythrow TIOException() <<
  709. "Table index " << tableIndex <<
  710. " is out of range [0, " << Locks_->size() << ")";
  711. }
  712. auto guard = Guard((*Locks_)[tableIndex]);
  713. Writer_->AddRow(std::move(row), tableIndex, rowWeight);
  714. }
  715. template <class U>
  716. void DoAddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  717. {
  718. if (tableIndex >= Locks_->size()) {
  719. ythrow TIOException() <<
  720. "Table index " << tableIndex <<
  721. " is out of range [0, " << Locks_->size() << ")";
  722. }
  723. auto guard = Guard((*Locks_)[tableIndex]);
  724. Writer_->AddRowBatch(rowBatch, tableIndex, rowBatchWeight);
  725. }
  726. template <class U>
  727. void DoAddRowBatch(TVector<U>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  728. {
  729. if (tableIndex >= Locks_->size()) {
  730. ythrow TIOException() <<
  731. "Table index " << tableIndex <<
  732. " is out of range [0, " << Locks_->size() << ")";
  733. }
  734. auto guard = Guard((*Locks_)[tableIndex]);
  735. Writer_->AddRowBatch(std::move(rowBatch), tableIndex, rowBatchWeight);
  736. }
  737. size_t DoGetBufferMemoryUsage() const
  738. {
  739. return Writer_->GetBufferMemoryUsage();
  740. }
  741. ::TIntrusivePtr<IWriterImpl> GetWriterImpl()
  742. {
  743. return Writer_;
  744. }
  745. private:
  746. ::TIntrusivePtr<IWriterImpl> Writer_;
  747. TAtomicSharedPtr<TVector<TAdaptiveLock>> Locks_;
  748. };
  749. template <>
  750. class TTableWriter<TNode>
  751. : public TTableWriterBase<TNode>
  752. {
  753. public:
  754. using TBase = TTableWriterBase<TNode>;
  755. explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
  756. : TBase(writer)
  757. { }
  758. };
  759. template <>
  760. class TTableWriter<TYaMRRow>
  761. : public TTableWriterBase<TYaMRRow>
  762. {
  763. public:
  764. using TBase = TTableWriterBase<TYaMRRow>;
  765. explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
  766. : TBase(writer)
  767. { }
  768. };
  769. template <>
  770. class TTableWriter<Message>
  771. : public TTableWriterBase<Message>
  772. {
  773. public:
  774. using TBase = TTableWriterBase<Message>;
  775. explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
  776. : TBase(writer)
  777. { }
  778. template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr>
  779. void AddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0)
  780. {
  781. TBase::AddRow(row, tableIndex, rowWeight);
  782. }
  783. template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr>
  784. void AddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  785. {
  786. for (const auto& row : rowBatch) {
  787. AddRow(row, tableIndex, rowBatchWeight / rowBatch.size());
  788. }
  789. }
  790. };
  791. template <class T>
  792. class TTableWriter<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
  793. : public TTableWriter<Message>
  794. {
  795. public:
  796. using TRowType = T;
  797. using TBase = TTableWriter<Message>;
  798. explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
  799. : TBase(writer)
  800. { }
  801. void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0)
  802. {
  803. TBase::AddRow<T>(row, tableIndex, rowWeight);
  804. }
  805. void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
  806. {
  807. TBase::AddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight);
  808. }
  809. };
  810. template <>
  811. inline TTableWriterPtr<TNode> IIOClient::CreateTableWriter<TNode>(
  812. const TRichYPath& path, const TTableWriterOptions& options)
  813. {
  814. return new TTableWriter<TNode>(CreateNodeWriter(path, options));
  815. }
  816. template <>
  817. inline TTableWriterPtr<TYaMRRow> IIOClient::CreateTableWriter<TYaMRRow>(
  818. const TRichYPath& path, const TTableWriterOptions& options)
  819. {
  820. return new TTableWriter<TYaMRRow>(CreateYaMRWriter(path, options));
  821. }
  822. template <class T>
  823. inline TTableWriterPtr<T> IIOClient::CreateTableWriter(
  824. const TRichYPath& path, const TTableWriterOptions& options)
  825. {
  826. if constexpr (TIsBaseOf<Message, T>::Value) {
  827. TAutoPtr<T> prototype(new T);
  828. return new TTableWriter<T>(CreateProtoWriter(path, options, prototype.Get()));
  829. } else {
  830. static_assert(TDependentFalse<T>, "Unsupported type for table writer");
  831. }
  832. }
  833. ////////////////////////////////////////////////////////////////////////////////
  834. template <typename T>
  835. TTableReaderPtr<T> CreateConcreteProtobufReader(TTableReader<Message>* reader)
  836. {
  837. static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)");
  838. Y_ENSURE(reader, "reader must be non-null");
  839. return ::MakeIntrusive<TTableReader<T>>(reader->GetReaderImpl());
  840. }
  841. template <typename T>
  842. TTableReaderPtr<T> CreateConcreteProtobufReader(const TTableReaderPtr<Message>& reader)
  843. {
  844. Y_ENSURE(reader, "reader must be non-null");
  845. return CreateConcreteProtobufReader<T>(reader.Get());
  846. }
  847. template <typename T>
  848. TTableReaderPtr<Message> CreateGenericProtobufReader(TTableReader<T>* reader)
  849. {
  850. static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)");
  851. Y_ENSURE(reader, "reader must be non-null");
  852. return ::MakeIntrusive<TTableReader<Message>>(reader->GetReaderImpl());
  853. }
  854. template <typename T>
  855. TTableReaderPtr<Message> CreateGenericProtobufReader(const TTableReaderPtr<T>& reader)
  856. {
  857. Y_ENSURE(reader, "reader must be non-null");
  858. return CreateGenericProtobufReader(reader.Get());
  859. }
  860. ////////////////////////////////////////////////////////////////////////////////
  861. } // namespace NYT