common.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707
  1. #include "common.h"
  2. #include "errors.h"
  3. #include "format.h"
  4. #include "serialize.h"
  5. #include "fluent.h"
  6. #include <yt/yt_proto/yt/formats/extension.pb.h>
  7. #include <library/cpp/yson/node/node_builder.h>
  8. #include <library/cpp/yson/node/node_io.h>
  9. #include <library/cpp/type_info/type.h>
  10. #include <util/generic/xrange.h>
  11. namespace NYT {
  12. using ::google::protobuf::Descriptor;
  13. ////////////////////////////////////////////////////////////////////////////////
  14. TSortColumn::TSortColumn(TStringBuf name, ESortOrder sortOrder)
  15. : Name_(name)
  16. , SortOrder_(sortOrder)
  17. { }
  18. TSortColumn::TSortColumn(const TString& name, ESortOrder sortOrder)
  19. : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
  20. { }
  21. TSortColumn::TSortColumn(const char* name, ESortOrder sortOrder)
  22. : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
  23. { }
  24. const TSortColumn& TSortColumn::EnsureAscending() const
  25. {
  26. Y_ENSURE(SortOrder() == ESortOrder::SO_ASCENDING);
  27. return *this;
  28. }
  29. TNode TSortColumn::ToNode() const
  30. {
  31. return BuildYsonNodeFluently().Value(*this);
  32. }
  33. ////////////////////////////////////////////////////////////////////////////////
  34. // Below lie backward compatibility methods.
  35. ////////////////////////////////////////////////////////////////////////////////
  36. TSortColumn& TSortColumn::operator = (TStringBuf name)
  37. {
  38. EnsureAscending();
  39. Name_ = name;
  40. return *this;
  41. }
  42. TSortColumn& TSortColumn::operator = (const TString& name)
  43. {
  44. return (*this = static_cast<TStringBuf>(name));
  45. }
  46. TSortColumn& TSortColumn::operator = (const char* name)
  47. {
  48. return (*this = static_cast<TStringBuf>(name));
  49. }
  50. bool TSortColumn::operator == (TStringBuf rhsName) const
  51. {
  52. EnsureAscending();
  53. return Name_ == rhsName;
  54. }
  55. bool TSortColumn::operator == (const TString& rhsName) const
  56. {
  57. return *this == static_cast<TStringBuf>(rhsName);
  58. }
  59. bool TSortColumn::operator == (const char* rhsName) const
  60. {
  61. return *this == static_cast<TStringBuf>(rhsName);
  62. }
  63. TSortColumn::operator TStringBuf() const
  64. {
  65. EnsureAscending();
  66. return Name_;
  67. }
  68. TSortColumn::operator TString() const
  69. {
  70. return TString(static_cast<TStringBuf>(*this));
  71. }
  72. TSortColumn::operator std::string() const
  73. {
  74. EnsureAscending();
  75. return static_cast<std::string>(Name_);
  76. }
  77. ////////////////////////////////////////////////////////////////////////////////
  78. TSortColumns::TSortColumns()
  79. { }
  80. TSortColumns::TSortColumns(const TVector<TString>& names)
  81. {
  82. Parts_.assign(names.begin(), names.end());
  83. }
  84. TSortColumns::TSortColumns(const TColumnNames& names)
  85. : TSortColumns(names.Parts_)
  86. { }
  87. TSortColumns::operator TColumnNames() const
  88. {
  89. return TColumnNames(EnsureAscending().GetNames());
  90. }
  91. const TSortColumns& TSortColumns::EnsureAscending() const
  92. {
  93. for (const auto& sortColumn : Parts_) {
  94. sortColumn.EnsureAscending();
  95. }
  96. return *this;
  97. }
  98. TVector<TString> TSortColumns::GetNames() const
  99. {
  100. TVector<TString> names;
  101. names.reserve(Parts_.size());
  102. for (const auto& sortColumn : Parts_) {
  103. names.push_back(sortColumn.Name());
  104. }
  105. return names;
  106. }
  107. ////////////////////////////////////////////////////////////////////////////////
  108. static NTi::TTypePtr OldTypeToTypeV3(EValueType type)
  109. {
  110. switch (type) {
  111. case VT_INT64:
  112. return NTi::Int64();
  113. case VT_UINT64:
  114. return NTi::Uint64();
  115. case VT_DOUBLE:
  116. return NTi::Double();
  117. case VT_BOOLEAN:
  118. return NTi::Bool();
  119. case VT_STRING:
  120. return NTi::String();
  121. case VT_ANY:
  122. return NTi::Yson();
  123. case VT_INT8:
  124. return NTi::Int8();
  125. case VT_INT16:
  126. return NTi::Int16();
  127. case VT_INT32:
  128. return NTi::Int32();
  129. case VT_UINT8:
  130. return NTi::Uint8();
  131. case VT_UINT16:
  132. return NTi::Uint16();
  133. case VT_UINT32:
  134. return NTi::Uint32();
  135. case VT_UTF8:
  136. return NTi::Utf8();
  137. case VT_NULL:
  138. return NTi::Null();
  139. case VT_VOID:
  140. return NTi::Void();
  141. case VT_DATE:
  142. return NTi::Date();
  143. case VT_DATETIME:
  144. return NTi::Datetime();
  145. case VT_TIMESTAMP:
  146. return NTi::Timestamp();
  147. case VT_INTERVAL:
  148. return NTi::Interval();
  149. case VT_FLOAT:
  150. return NTi::Float();
  151. case VT_JSON:
  152. return NTi::Json();
  153. case VT_DATE32:
  154. return NTi::Date32();
  155. case VT_DATETIME64:
  156. return NTi::Datetime64();
  157. case VT_TIMESTAMP64:
  158. return NTi::Timestamp64();
  159. case VT_INTERVAL64:
  160. return NTi::Interval64();
  161. case VT_UUID:
  162. return NTi::Uuid();
  163. }
  164. }
  165. static std::pair<EValueType, bool> Simplify(const NTi::TTypePtr& type)
  166. {
  167. using namespace NTi;
  168. const auto typeName = type->GetTypeName();
  169. switch (typeName) {
  170. case ETypeName::Bool:
  171. return {VT_BOOLEAN, true};
  172. case ETypeName::Int8:
  173. return {VT_INT8, true};
  174. case ETypeName::Int16:
  175. return {VT_INT16, true};
  176. case ETypeName::Int32:
  177. return {VT_INT32, true};
  178. case ETypeName::Int64:
  179. return {VT_INT64, true};
  180. case ETypeName::Uint8:
  181. return {VT_UINT8, true};
  182. case ETypeName::Uint16:
  183. return {VT_UINT16, true};
  184. case ETypeName::Uint32:
  185. return {VT_UINT32, true};
  186. case ETypeName::Uint64:
  187. return {VT_UINT64, true};
  188. case ETypeName::Float:
  189. return {VT_FLOAT, true};
  190. case ETypeName::Double:
  191. return {VT_DOUBLE, true};
  192. case ETypeName::String:
  193. return {VT_STRING, true};
  194. case ETypeName::Utf8:
  195. return {VT_UTF8, true};
  196. case ETypeName::Date:
  197. return {VT_DATE, true};
  198. case ETypeName::Datetime:
  199. return {VT_DATETIME, true};
  200. case ETypeName::Timestamp:
  201. return {VT_TIMESTAMP, true};
  202. case ETypeName::Interval:
  203. return {VT_INTERVAL, true};
  204. case ETypeName::TzDate:
  205. case ETypeName::TzDatetime:
  206. case ETypeName::TzTimestamp:
  207. break;
  208. case ETypeName::Json:
  209. return {VT_JSON, true};
  210. case ETypeName::Decimal:
  211. return {VT_STRING, true};
  212. case ETypeName::Uuid:
  213. return {VT_UUID, true};
  214. case ETypeName::Yson:
  215. return {VT_ANY, true};
  216. case ETypeName::Date32:
  217. return {VT_DATE32, true};
  218. case ETypeName::Datetime64:
  219. return {VT_DATETIME64, true};
  220. case ETypeName::Timestamp64:
  221. return {VT_TIMESTAMP64, true};
  222. case ETypeName::Interval64:
  223. return {VT_INTERVAL64, true};
  224. case ETypeName::Void:
  225. return {VT_VOID, false};
  226. case ETypeName::Null:
  227. return {VT_NULL, false};
  228. case ETypeName::Optional:
  229. {
  230. auto itemType = type->AsOptional()->GetItemType();
  231. if (itemType->IsPrimitive()) {
  232. auto simplified = Simplify(itemType->AsPrimitive());
  233. if (simplified.second) {
  234. simplified.second = false;
  235. return simplified;
  236. }
  237. }
  238. return {VT_ANY, false};
  239. }
  240. case ETypeName::List:
  241. return {VT_ANY, true};
  242. case ETypeName::Dict:
  243. return {VT_ANY, true};
  244. case ETypeName::Struct:
  245. return {VT_ANY, true};
  246. case ETypeName::Tuple:
  247. return {VT_ANY, true};
  248. case ETypeName::Variant:
  249. return {VT_ANY, true};
  250. case ETypeName::Tagged:
  251. return Simplify(type->AsTagged()->GetItemType());
  252. }
  253. ythrow TApiUsageError() << "Unsupported type: " << typeName;
  254. }
  255. NTi::TTypePtr ToTypeV3(EValueType type, bool required)
  256. {
  257. auto typeV3 = OldTypeToTypeV3(type);
  258. if (!Simplify(typeV3).second) {
  259. if (required) {
  260. ythrow TApiUsageError() << "type: " << type << " cannot be required";
  261. } else {
  262. return typeV3;
  263. }
  264. }
  265. if (required) {
  266. return typeV3;
  267. } else {
  268. return NTi::Optional(typeV3);
  269. }
  270. }
  271. TColumnSchema::TColumnSchema()
  272. : TypeV3_(NTi::Optional(NTi::Int64()))
  273. { }
  274. EValueType TColumnSchema::Type() const
  275. {
  276. return Simplify(TypeV3_).first;
  277. }
  278. TColumnSchema& TColumnSchema::Type(EValueType type) &
  279. {
  280. return Type(ToTypeV3(type, false));
  281. }
  282. TColumnSchema TColumnSchema::Type(EValueType type) &&
  283. {
  284. return Type(ToTypeV3(type, false));
  285. }
  286. TColumnSchema& TColumnSchema::Type(const NTi::TTypePtr& type) &
  287. {
  288. Y_ABORT_UNLESS(type.Get(), "Cannot create column schema with nullptr type");
  289. TypeV3_ = type;
  290. RawTypeV3_ = {};
  291. return *this;
  292. }
  293. TColumnSchema TColumnSchema::Type(const NTi::TTypePtr& type) &&
  294. {
  295. Y_ABORT_UNLESS(type.Get(), "Cannot create column schema with nullptr type");
  296. TypeV3_ = type;
  297. RawTypeV3_ = {};
  298. return *this;
  299. }
  300. TColumnSchema& TColumnSchema::TypeV3(const NTi::TTypePtr& type) &
  301. {
  302. return Type(type);
  303. }
  304. TColumnSchema TColumnSchema::TypeV3(const NTi::TTypePtr& type) &&
  305. {
  306. return Type(type);
  307. }
  308. NTi::TTypePtr TColumnSchema::TypeV3() const
  309. {
  310. return TypeV3_;
  311. }
  312. bool TColumnSchema::Required() const
  313. {
  314. return Simplify(TypeV3_).second;
  315. }
  316. TColumnSchema& TColumnSchema::Type(EValueType type, bool required) &
  317. {
  318. return Type(ToTypeV3(type, required));
  319. }
  320. TColumnSchema TColumnSchema::Type(EValueType type, bool required) &&
  321. {
  322. return Type(ToTypeV3(type, required));
  323. }
  324. const TMaybe<TNode>& TColumnSchema::RawTypeV3() const
  325. {
  326. return RawTypeV3_;
  327. }
  328. TColumnSchema& TColumnSchema::RawTypeV3(TNode rawTypeV3) &
  329. {
  330. RawTypeV3_ = std::move(rawTypeV3);
  331. TypeV3_ = nullptr;
  332. return *this;
  333. }
  334. TColumnSchema TColumnSchema::RawTypeV3(TNode rawTypeV3) &&
  335. {
  336. RawTypeV3_ = std::move(rawTypeV3);
  337. TypeV3_ = nullptr;
  338. return *this;
  339. }
  340. bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs)
  341. {
  342. return
  343. lhs.Name() == rhs.Name() &&
  344. NTi::NEq::TStrictlyEqual()(lhs.TypeV3(), rhs.TypeV3()) &&
  345. lhs.SortOrder() == rhs.SortOrder() &&
  346. lhs.Lock() == rhs.Lock() &&
  347. lhs.Expression() == rhs.Expression() &&
  348. lhs.Aggregate() == rhs.Aggregate() &&
  349. lhs.Group() == rhs.Group();
  350. }
  351. ////////////////////////////////////////////////////////////////////////////////
  352. bool TTableSchema::Empty() const
  353. {
  354. return Columns_.empty();
  355. }
  356. TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type) &
  357. {
  358. Columns_.push_back(TColumnSchema().Name(name).Type(type));
  359. return *this;
  360. }
  361. TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type) &&
  362. {
  363. return std::move(AddColumn(name, type));
  364. }
  365. TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &
  366. {
  367. Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
  368. return *this;
  369. }
  370. TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&
  371. {
  372. return std::move(AddColumn(name, type, sortOrder));
  373. }
  374. TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &
  375. {
  376. Columns_.push_back(TColumnSchema().Name(name).Type(type));
  377. return *this;
  378. }
  379. TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &&
  380. {
  381. return std::move(AddColumn(name, type));
  382. }
  383. TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &
  384. {
  385. Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
  386. return *this;
  387. }
  388. TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&
  389. {
  390. return std::move(AddColumn(name, type, sortOrder));
  391. }
  392. TTableSchema& TTableSchema::SortBy(const TSortColumns& sortColumns) &
  393. {
  394. Y_ENSURE(sortColumns.Parts_.size() <= Columns_.size());
  395. THashMap<TString, ui64> sortColumnIndex;
  396. for (auto i: xrange(sortColumns.Parts_.size())) {
  397. Y_ENSURE(sortColumnIndex.emplace(sortColumns.Parts_[i].Name(), i).second,
  398. "Key column name '" << sortColumns.Parts_[i].Name() << "' repeats in columns list");
  399. }
  400. TVector<TColumnSchema> newColumnsSorted(sortColumns.Parts_.size());
  401. TVector<TColumnSchema> newColumnsUnsorted;
  402. for (auto& column : Columns_) {
  403. auto it = sortColumnIndex.find(column.Name());
  404. if (it == sortColumnIndex.end()) {
  405. column.ResetSortOrder();
  406. newColumnsUnsorted.push_back(std::move(column));
  407. } else {
  408. auto index = it->second;
  409. const auto& sortColumn = sortColumns.Parts_[index];
  410. column.SortOrder(sortColumn.SortOrder());
  411. newColumnsSorted[index] = std::move(column);
  412. sortColumnIndex.erase(it);
  413. }
  414. }
  415. Y_ENSURE(sortColumnIndex.empty(), "Column name '" << sortColumnIndex.begin()->first
  416. << "' not found in table schema");
  417. newColumnsSorted.insert(newColumnsSorted.end(), newColumnsUnsorted.begin(), newColumnsUnsorted.end());
  418. Columns_ = std::move(newColumnsSorted);
  419. return *this;
  420. }
  421. TTableSchema TTableSchema::SortBy(const TSortColumns& sortColumns) &&
  422. {
  423. return std::move(SortBy(sortColumns));
  424. }
  425. TVector<TColumnSchema>& TTableSchema::MutableColumns()
  426. {
  427. return Columns_;
  428. }
  429. TNode TTableSchema::ToNode() const
  430. {
  431. TNode result;
  432. TNodeBuilder builder(&result);
  433. Serialize(*this, &builder);
  434. return result;
  435. }
  436. TTableSchema TTableSchema::FromNode(const TNode& node)
  437. {
  438. TTableSchema schema;
  439. Deserialize(schema, node);
  440. return schema;
  441. }
  442. bool operator==(const TTableSchema& lhs, const TTableSchema& rhs)
  443. {
  444. return
  445. lhs.Columns() == rhs.Columns() &&
  446. lhs.Strict() == rhs.Strict() &&
  447. lhs.UniqueKeys() == rhs.UniqueKeys();
  448. }
  449. void PrintTo(const TTableSchema& schema, std::ostream* out)
  450. {
  451. (*out) << NodeToYsonString(schema.ToNode(), NYson::EYsonFormat::Pretty);
  452. }
  453. ////////////////////////////////////////////////////////////////////////////////
  454. TKeyBound::TKeyBound(ERelation relation, TKey key)
  455. : Relation_(relation)
  456. , Key_(std::move(key))
  457. { }
  458. ////////////////////////////////////////////////////////////////////////////////
  459. TTableSchema CreateTableSchema(
  460. const Descriptor& messageDescriptor,
  461. const TSortColumns& sortColumns,
  462. bool keepFieldsWithoutExtension)
  463. {
  464. auto result = CreateTableSchema(messageDescriptor, keepFieldsWithoutExtension);
  465. if (!sortColumns.Parts_.empty()) {
  466. result.SortBy(sortColumns.Parts_);
  467. }
  468. return result;
  469. }
  470. TTableSchema CreateTableSchema(NTi::TTypePtr type)
  471. {
  472. Y_ABORT_UNLESS(type);
  473. TTableSchema schema;
  474. Deserialize(schema, NodeFromYsonString(NTi::NIo::AsYtSchema(type.Get())));
  475. return schema;
  476. }
  477. ////////////////////////////////////////////////////////////////////////////////
  478. bool IsTrivial(const TReadLimit& readLimit)
  479. {
  480. return !readLimit.Key_ && !readLimit.RowIndex_ && !readLimit.Offset_ && !readLimit.TabletIndex_ && !readLimit.KeyBound_;
  481. }
  482. EValueType NodeTypeToValueType(TNode::EType nodeType)
  483. {
  484. switch (nodeType) {
  485. case TNode::EType::Int64: return VT_INT64;
  486. case TNode::EType::Uint64: return VT_UINT64;
  487. case TNode::EType::String: return VT_STRING;
  488. case TNode::EType::Double: return VT_DOUBLE;
  489. case TNode::EType::Bool: return VT_BOOLEAN;
  490. default:
  491. ythrow yexception() << "Cannot convert TNode type " << nodeType << " to EValueType";
  492. }
  493. }
  494. ////////////////////////////////////////////////////////////////////////////////
  495. const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path)
  496. {
  497. static const TVector<TReadRange> empty;
  498. const auto& maybeRanges = path.GetRanges();
  499. if (maybeRanges.Empty()) {
  500. return empty;
  501. } else if (maybeRanges->size() > 0) {
  502. return *maybeRanges;
  503. } else {
  504. // If you see this exception, that means that caller of this function doesn't known what to do
  505. // with RichYPath that has set range list, but the range list is empty.
  506. //
  507. // To avoid this exception caller must explicitly handle such case.
  508. // NB. YT-17683
  509. ythrow TApiUsageError() << "Unsupported RichYPath: explicitly empty range list";
  510. }
  511. }
  512. ////////////////////////////////////////////////////////////////////////////////
  513. namespace NDetail {
  514. ////////////////////////////////////////////////////////////////////////////////
  515. TString ToString(EValueType type)
  516. {
  517. switch (type) {
  518. case VT_INT8:
  519. return "int8";
  520. case VT_INT16:
  521. return "int16";
  522. case VT_INT32:
  523. return "int32";
  524. case VT_INT64:
  525. return "int64";
  526. case VT_UINT8:
  527. return "uint8";
  528. case VT_UINT16:
  529. return "uint16";
  530. case VT_UINT32:
  531. return "uint32";
  532. case VT_UINT64:
  533. return "uint64";
  534. case VT_DOUBLE:
  535. return "double";
  536. case VT_BOOLEAN:
  537. return "boolean";
  538. case VT_STRING:
  539. return "string";
  540. case VT_UTF8:
  541. return "utf8";
  542. case VT_ANY:
  543. return "any";
  544. case VT_NULL:
  545. return "null";
  546. case VT_VOID:
  547. return "void";
  548. case VT_DATE:
  549. return "date";
  550. case VT_DATETIME:
  551. return "datetime";
  552. case VT_TIMESTAMP:
  553. return "timestamp";
  554. case VT_INTERVAL:
  555. return "interval";
  556. case VT_FLOAT:
  557. return "float";
  558. case VT_JSON:
  559. return "json";
  560. case VT_DATE32:
  561. return "date32";
  562. case VT_DATETIME64:
  563. return "datetime64";
  564. case VT_TIMESTAMP64:
  565. return "timestamp64";
  566. case VT_INTERVAL64:
  567. return "interval64";
  568. case VT_UUID:
  569. return "uuid";
  570. }
  571. ythrow yexception() << "Invalid value type " << static_cast<int>(type);
  572. }
  573. ////////////////////////////////////////////////////////////////////////////////
  574. } // namespace NDetail
  575. } // namespace NYT
  576. template <>
  577. void Out<NYT::TSortColumn>(IOutputStream& os, const NYT::TSortColumn& sortColumn)
  578. {
  579. if (sortColumn.SortOrder() == NYT::ESortOrder::SO_ASCENDING) {
  580. os << sortColumn.Name();
  581. } else {
  582. os << NYT::BuildYsonStringFluently(NYson::EYsonFormat::Text).Value(sortColumn);
  583. }
  584. }