common.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. #include "common.h"
  2. #include "errors.h"
  3. #include "format.h"
  4. #include "serialize.h"
  5. #include "fluent.h"
  6. #include <yt/yt_proto/yt/formats/extension.pb.h>
  7. #include <library/cpp/yson/node/node_builder.h>
  8. #include <library/cpp/yson/node/node_io.h>
  9. #include <library/cpp/type_info/type.h>
  10. #include <util/generic/xrange.h>
  11. namespace NYT {
  12. using ::google::protobuf::Descriptor;
  13. ////////////////////////////////////////////////////////////////////////////////
  14. TSortColumn::TSortColumn(TStringBuf name, ESortOrder sortOrder)
  15. : Name_(name)
  16. , SortOrder_(sortOrder)
  17. { }
  18. TSortColumn::TSortColumn(const TString& name, ESortOrder sortOrder)
  19. : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
  20. { }
  21. TSortColumn::TSortColumn(const char* name, ESortOrder sortOrder)
  22. : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
  23. { }
  24. const TSortColumn& TSortColumn::EnsureAscending() const
  25. {
  26. Y_ENSURE(SortOrder() == ESortOrder::SO_ASCENDING);
  27. return *this;
  28. }
  29. TNode TSortColumn::ToNode() const
  30. {
  31. return BuildYsonNodeFluently().Value(*this);
  32. }
  33. ////////////////////////////////////////////////////////////////////////////////
  34. // Below lie backward compatibility methods.
  35. ////////////////////////////////////////////////////////////////////////////////
  36. TSortColumn& TSortColumn::operator = (TStringBuf name)
  37. {
  38. EnsureAscending();
  39. Name_ = name;
  40. return *this;
  41. }
  42. TSortColumn& TSortColumn::operator = (const TString& name)
  43. {
  44. return (*this = static_cast<TStringBuf>(name));
  45. }
  46. TSortColumn& TSortColumn::operator = (const char* name)
  47. {
  48. return (*this = static_cast<TStringBuf>(name));
  49. }
  50. bool TSortColumn::operator == (TStringBuf rhsName) const
  51. {
  52. EnsureAscending();
  53. return Name_ == rhsName;
  54. }
  55. bool TSortColumn::operator == (const TString& rhsName) const
  56. {
  57. return *this == static_cast<TStringBuf>(rhsName);
  58. }
  59. bool TSortColumn::operator == (const char* rhsName) const
  60. {
  61. return *this == static_cast<TStringBuf>(rhsName);
  62. }
  63. TSortColumn::operator TStringBuf() const
  64. {
  65. EnsureAscending();
  66. return Name_;
  67. }
  68. TSortColumn::operator TString() const
  69. {
  70. return TString(static_cast<TStringBuf>(*this));
  71. }
  72. TSortColumn::operator std::string() const
  73. {
  74. EnsureAscending();
  75. return static_cast<std::string>(Name_);
  76. }
  77. ////////////////////////////////////////////////////////////////////////////////
  78. TSortColumns::TSortColumns()
  79. { }
  80. TSortColumns::TSortColumns(const TVector<TString>& names)
  81. {
  82. Parts_.assign(names.begin(), names.end());
  83. }
  84. TSortColumns::TSortColumns(const TColumnNames& names)
  85. : TSortColumns(names.Parts_)
  86. { }
  87. TSortColumns::operator TColumnNames() const
  88. {
  89. return TColumnNames(EnsureAscending().GetNames());
  90. }
  91. const TSortColumns& TSortColumns::EnsureAscending() const
  92. {
  93. for (const auto& sortColumn : Parts_) {
  94. sortColumn.EnsureAscending();
  95. }
  96. return *this;
  97. }
  98. TVector<TString> TSortColumns::GetNames() const
  99. {
  100. TVector<TString> names;
  101. names.reserve(Parts_.size());
  102. for (const auto& sortColumn : Parts_) {
  103. names.push_back(sortColumn.Name());
  104. }
  105. return names;
  106. }
  107. ////////////////////////////////////////////////////////////////////////////////
  108. static NTi::TTypePtr OldTypeToTypeV3(EValueType type)
  109. {
  110. switch (type) {
  111. case VT_INT64:
  112. return NTi::Int64();
  113. case VT_UINT64:
  114. return NTi::Uint64();
  115. case VT_DOUBLE:
  116. return NTi::Double();
  117. case VT_BOOLEAN:
  118. return NTi::Bool();
  119. case VT_STRING:
  120. return NTi::String();
  121. case VT_ANY:
  122. return NTi::Yson();
  123. case VT_INT8:
  124. return NTi::Int8();
  125. case VT_INT16:
  126. return NTi::Int16();
  127. case VT_INT32:
  128. return NTi::Int32();
  129. case VT_UINT8:
  130. return NTi::Uint8();
  131. case VT_UINT16:
  132. return NTi::Uint16();
  133. case VT_UINT32:
  134. return NTi::Uint32();
  135. case VT_UTF8:
  136. return NTi::Utf8();
  137. case VT_NULL:
  138. return NTi::Null();
  139. case VT_VOID:
  140. return NTi::Void();
  141. case VT_DATE:
  142. return NTi::Date();
  143. case VT_DATETIME:
  144. return NTi::Datetime();
  145. case VT_TIMESTAMP:
  146. return NTi::Timestamp();
  147. case VT_INTERVAL:
  148. return NTi::Interval();
  149. case VT_FLOAT:
  150. return NTi::Float();
  151. case VT_JSON:
  152. return NTi::Json();
  153. case VT_DATE32:
  154. return NTi::Date32();
  155. case VT_DATETIME64:
  156. return NTi::Datetime64();
  157. case VT_TIMESTAMP64:
  158. return NTi::Timestamp64();
  159. case VT_INTERVAL64:
  160. return NTi::Interval64();
  161. }
  162. }
  163. static std::pair<EValueType, bool> Simplify(const NTi::TTypePtr& type)
  164. {
  165. using namespace NTi;
  166. const auto typeName = type->GetTypeName();
  167. switch (typeName) {
  168. case ETypeName::Bool:
  169. return {VT_BOOLEAN, true};
  170. case ETypeName::Int8:
  171. return {VT_INT8, true};
  172. case ETypeName::Int16:
  173. return {VT_INT16, true};
  174. case ETypeName::Int32:
  175. return {VT_INT32, true};
  176. case ETypeName::Int64:
  177. return {VT_INT64, true};
  178. case ETypeName::Uint8:
  179. return {VT_UINT8, true};
  180. case ETypeName::Uint16:
  181. return {VT_UINT16, true};
  182. case ETypeName::Uint32:
  183. return {VT_UINT32, true};
  184. case ETypeName::Uint64:
  185. return {VT_UINT64, true};
  186. case ETypeName::Float:
  187. return {VT_FLOAT, true};
  188. case ETypeName::Double:
  189. return {VT_DOUBLE, true};
  190. case ETypeName::String:
  191. return {VT_STRING, true};
  192. case ETypeName::Utf8:
  193. return {VT_UTF8, true};
  194. case ETypeName::Date:
  195. return {VT_DATE, true};
  196. case ETypeName::Datetime:
  197. return {VT_DATETIME, true};
  198. case ETypeName::Timestamp:
  199. return {VT_TIMESTAMP, true};
  200. case ETypeName::Interval:
  201. return {VT_INTERVAL, true};
  202. case ETypeName::TzDate:
  203. case ETypeName::TzDatetime:
  204. case ETypeName::TzTimestamp:
  205. break;
  206. case ETypeName::Json:
  207. return {VT_JSON, true};
  208. case ETypeName::Decimal:
  209. return {VT_STRING, true};
  210. case ETypeName::Uuid:
  211. break;
  212. case ETypeName::Yson:
  213. return {VT_ANY, true};
  214. case ETypeName::Date32:
  215. return {VT_DATE32, true};
  216. case ETypeName::Datetime64:
  217. return {VT_DATETIME64, true};
  218. case ETypeName::Timestamp64:
  219. return {VT_TIMESTAMP64, true};
  220. case ETypeName::Interval64:
  221. return {VT_INTERVAL64, true};
  222. case ETypeName::Void:
  223. return {VT_VOID, false};
  224. case ETypeName::Null:
  225. return {VT_NULL, false};
  226. case ETypeName::Optional:
  227. {
  228. auto itemType = type->AsOptional()->GetItemType();
  229. if (itemType->IsPrimitive()) {
  230. auto simplified = Simplify(itemType->AsPrimitive());
  231. if (simplified.second) {
  232. simplified.second = false;
  233. return simplified;
  234. }
  235. }
  236. return {VT_ANY, false};
  237. }
  238. case ETypeName::List:
  239. return {VT_ANY, true};
  240. case ETypeName::Dict:
  241. return {VT_ANY, true};
  242. case ETypeName::Struct:
  243. return {VT_ANY, true};
  244. case ETypeName::Tuple:
  245. return {VT_ANY, true};
  246. case ETypeName::Variant:
  247. return {VT_ANY, true};
  248. case ETypeName::Tagged:
  249. return Simplify(type->AsTagged()->GetItemType());
  250. }
  251. ythrow TApiUsageError() << "Unsupported type: " << typeName;
  252. }
  253. NTi::TTypePtr ToTypeV3(EValueType type, bool required)
  254. {
  255. auto typeV3 = OldTypeToTypeV3(type);
  256. if (!Simplify(typeV3).second) {
  257. if (required) {
  258. ythrow TApiUsageError() << "type: " << type << " cannot be required";
  259. } else {
  260. return typeV3;
  261. }
  262. }
  263. if (required) {
  264. return typeV3;
  265. } else {
  266. return NTi::Optional(typeV3);
  267. }
  268. }
  269. TColumnSchema::TColumnSchema()
  270. : TypeV3_(NTi::Optional(NTi::Int64()))
  271. { }
  272. EValueType TColumnSchema::Type() const
  273. {
  274. return Simplify(TypeV3_).first;
  275. }
  276. TColumnSchema& TColumnSchema::Type(EValueType type) &
  277. {
  278. return Type(ToTypeV3(type, false));
  279. }
  280. TColumnSchema TColumnSchema::Type(EValueType type) &&
  281. {
  282. return Type(ToTypeV3(type, false));
  283. }
  284. TColumnSchema& TColumnSchema::Type(const NTi::TTypePtr& type) &
  285. {
  286. Y_ABORT_UNLESS(type.Get(), "Cannot create column schema with nullptr type");
  287. TypeV3_ = type;
  288. return *this;
  289. }
  290. TColumnSchema TColumnSchema::Type(const NTi::TTypePtr& type) &&
  291. {
  292. Y_ABORT_UNLESS(type.Get(), "Cannot create column schema with nullptr type");
  293. TypeV3_ = type;
  294. return *this;
  295. }
  296. TColumnSchema& TColumnSchema::TypeV3(const NTi::TTypePtr& type) &
  297. {
  298. return Type(type);
  299. }
  300. TColumnSchema TColumnSchema::TypeV3(const NTi::TTypePtr& type) &&
  301. {
  302. return Type(type);
  303. }
  304. NTi::TTypePtr TColumnSchema::TypeV3() const
  305. {
  306. return TypeV3_;
  307. }
  308. bool TColumnSchema::Required() const
  309. {
  310. return Simplify(TypeV3_).second;
  311. }
  312. TColumnSchema& TColumnSchema::Type(EValueType type, bool required) &
  313. {
  314. return Type(ToTypeV3(type, required));
  315. }
  316. TColumnSchema TColumnSchema::Type(EValueType type, bool required) &&
  317. {
  318. return Type(ToTypeV3(type, required));
  319. }
  320. bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs)
  321. {
  322. return
  323. lhs.Name() == rhs.Name() &&
  324. NTi::NEq::TStrictlyEqual()(lhs.TypeV3(), rhs.TypeV3()) &&
  325. lhs.SortOrder() == rhs.SortOrder() &&
  326. lhs.Lock() == rhs.Lock() &&
  327. lhs.Expression() == rhs.Expression() &&
  328. lhs.Aggregate() == rhs.Aggregate() &&
  329. lhs.Group() == rhs.Group();
  330. }
  331. ////////////////////////////////////////////////////////////////////////////////
  332. bool TTableSchema::Empty() const
  333. {
  334. return Columns_.empty();
  335. }
  336. TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type) &
  337. {
  338. Columns_.push_back(TColumnSchema().Name(name).Type(type));
  339. return *this;
  340. }
  341. TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type) &&
  342. {
  343. return std::move(AddColumn(name, type));
  344. }
  345. TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &
  346. {
  347. Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
  348. return *this;
  349. }
  350. TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&
  351. {
  352. return std::move(AddColumn(name, type, sortOrder));
  353. }
  354. TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &
  355. {
  356. Columns_.push_back(TColumnSchema().Name(name).Type(type));
  357. return *this;
  358. }
  359. TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &&
  360. {
  361. return std::move(AddColumn(name, type));
  362. }
  363. TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &
  364. {
  365. Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
  366. return *this;
  367. }
  368. TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&
  369. {
  370. return std::move(AddColumn(name, type, sortOrder));
  371. }
  372. TTableSchema& TTableSchema::SortBy(const TSortColumns& sortColumns) &
  373. {
  374. Y_ENSURE(sortColumns.Parts_.size() <= Columns_.size());
  375. THashMap<TString, ui64> sortColumnIndex;
  376. for (auto i: xrange(sortColumns.Parts_.size())) {
  377. Y_ENSURE(sortColumnIndex.emplace(sortColumns.Parts_[i].Name(), i).second,
  378. "Key column name '" << sortColumns.Parts_[i].Name() << "' repeats in columns list");
  379. }
  380. TVector<TColumnSchema> newColumnsSorted(sortColumns.Parts_.size());
  381. TVector<TColumnSchema> newColumnsUnsorted;
  382. for (auto& column : Columns_) {
  383. auto it = sortColumnIndex.find(column.Name());
  384. if (it == sortColumnIndex.end()) {
  385. column.ResetSortOrder();
  386. newColumnsUnsorted.push_back(std::move(column));
  387. } else {
  388. auto index = it->second;
  389. const auto& sortColumn = sortColumns.Parts_[index];
  390. column.SortOrder(sortColumn.SortOrder());
  391. newColumnsSorted[index] = std::move(column);
  392. sortColumnIndex.erase(it);
  393. }
  394. }
  395. Y_ENSURE(sortColumnIndex.empty(), "Column name '" << sortColumnIndex.begin()->first
  396. << "' not found in table schema");
  397. newColumnsSorted.insert(newColumnsSorted.end(), newColumnsUnsorted.begin(), newColumnsUnsorted.end());
  398. Columns_ = std::move(newColumnsSorted);
  399. return *this;
  400. }
  401. TTableSchema TTableSchema::SortBy(const TSortColumns& sortColumns) &&
  402. {
  403. return std::move(SortBy(sortColumns));
  404. }
  405. TVector<TColumnSchema>& TTableSchema::MutableColumns()
  406. {
  407. return Columns_;
  408. }
  409. TNode TTableSchema::ToNode() const
  410. {
  411. TNode result;
  412. TNodeBuilder builder(&result);
  413. Serialize(*this, &builder);
  414. return result;
  415. }
  416. TTableSchema TTableSchema::FromNode(const TNode& node)
  417. {
  418. TTableSchema schema;
  419. Deserialize(schema, node);
  420. return schema;
  421. }
  422. bool operator==(const TTableSchema& lhs, const TTableSchema& rhs)
  423. {
  424. return
  425. lhs.Columns() == rhs.Columns() &&
  426. lhs.Strict() == rhs.Strict() &&
  427. lhs.UniqueKeys() == rhs.UniqueKeys();
  428. }
  429. void PrintTo(const TTableSchema& schema, std::ostream* out)
  430. {
  431. (*out) << NodeToYsonString(schema.ToNode(), NYson::EYsonFormat::Pretty);
  432. }
  433. ////////////////////////////////////////////////////////////////////////////////
  434. TKeyBound::TKeyBound(ERelation relation, TKey key)
  435. : Relation_(relation)
  436. , Key_(std::move(key))
  437. { }
  438. ////////////////////////////////////////////////////////////////////////////////
  439. TTableSchema CreateTableSchema(
  440. const Descriptor& messageDescriptor,
  441. const TSortColumns& sortColumns,
  442. bool keepFieldsWithoutExtension)
  443. {
  444. auto result = CreateTableSchema(messageDescriptor, keepFieldsWithoutExtension);
  445. if (!sortColumns.Parts_.empty()) {
  446. result.SortBy(sortColumns.Parts_);
  447. }
  448. return result;
  449. }
  450. TTableSchema CreateTableSchema(NTi::TTypePtr type)
  451. {
  452. Y_ABORT_UNLESS(type);
  453. TTableSchema schema;
  454. Deserialize(schema, NodeFromYsonString(NTi::NIo::AsYtSchema(type.Get())));
  455. return schema;
  456. }
  457. ////////////////////////////////////////////////////////////////////////////////
  458. bool IsTrivial(const TReadLimit& readLimit)
  459. {
  460. return !readLimit.Key_ && !readLimit.RowIndex_ && !readLimit.Offset_ && !readLimit.TabletIndex_ && !readLimit.KeyBound_;
  461. }
  462. EValueType NodeTypeToValueType(TNode::EType nodeType)
  463. {
  464. switch (nodeType) {
  465. case TNode::EType::Int64: return VT_INT64;
  466. case TNode::EType::Uint64: return VT_UINT64;
  467. case TNode::EType::String: return VT_STRING;
  468. case TNode::EType::Double: return VT_DOUBLE;
  469. case TNode::EType::Bool: return VT_BOOLEAN;
  470. default:
  471. ythrow yexception() << "Cannot convert TNode type " << nodeType << " to EValueType";
  472. }
  473. }
  474. ////////////////////////////////////////////////////////////////////////////////
  475. const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path)
  476. {
  477. static const TVector<TReadRange> empty;
  478. const auto& maybeRanges = path.GetRanges();
  479. if (maybeRanges.Empty()) {
  480. return empty;
  481. } else if (maybeRanges->size() > 0) {
  482. return *maybeRanges;
  483. } else {
  484. // If you see this exception, that means that caller of this function doesn't known what to do
  485. // with RichYPath that has set range list, but the range list is empty.
  486. //
  487. // To avoid this exception caller must explicitly handle such case.
  488. // NB. YT-17683
  489. ythrow TApiUsageError() << "Unsupported RichYPath: explicitly empty range list";
  490. }
  491. }
  492. ////////////////////////////////////////////////////////////////////////////////
  493. namespace NDetail {
  494. ////////////////////////////////////////////////////////////////////////////////
  495. TString ToString(EValueType type)
  496. {
  497. switch (type) {
  498. case VT_INT8:
  499. return "int8";
  500. case VT_INT16:
  501. return "int16";
  502. case VT_INT32:
  503. return "int32";
  504. case VT_INT64:
  505. return "int64";
  506. case VT_UINT8:
  507. return "uint8";
  508. case VT_UINT16:
  509. return "uint16";
  510. case VT_UINT32:
  511. return "uint32";
  512. case VT_UINT64:
  513. return "uint64";
  514. case VT_DOUBLE:
  515. return "double";
  516. case VT_BOOLEAN:
  517. return "boolean";
  518. case VT_STRING:
  519. return "string";
  520. case VT_UTF8:
  521. return "utf8";
  522. case VT_ANY:
  523. return "any";
  524. case VT_NULL:
  525. return "null";
  526. case VT_VOID:
  527. return "void";
  528. case VT_DATE:
  529. return "date";
  530. case VT_DATETIME:
  531. return "datetime";
  532. case VT_TIMESTAMP:
  533. return "timestamp";
  534. case VT_INTERVAL:
  535. return "interval";
  536. case VT_FLOAT:
  537. return "float";
  538. case VT_JSON:
  539. return "json";
  540. case VT_DATE32:
  541. return "date32";
  542. case VT_DATETIME64:
  543. return "datetime64";
  544. case VT_TIMESTAMP64:
  545. return "timestamp64";
  546. case VT_INTERVAL64:
  547. return "interval64";
  548. }
  549. ythrow yexception() << "Invalid value type " << static_cast<int>(type);
  550. }
  551. ////////////////////////////////////////////////////////////////////////////////
  552. } // namespace NDetail
  553. } // namespace NYT
  554. template <>
  555. void Out<NYT::TSortColumn>(IOutputStream& os, const NYT::TSortColumn& sortColumn)
  556. {
  557. if (sortColumn.SortOrder() == NYT::ESortOrder::SO_ASCENDING) {
  558. os << sortColumn.Name();
  559. } else {
  560. os << NYT::BuildYsonStringFluently(NYson::EYsonFormat::Text).Value(sortColumn);
  561. }
  562. }