common.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686
  1. #include "common.h"
  2. #include "errors.h"
  3. #include "format.h"
  4. #include "serialize.h"
  5. #include "fluent.h"
  6. #include <yt/yt_proto/yt/formats/extension.pb.h>
  7. #include <library/cpp/yson/node/node_builder.h>
  8. #include <library/cpp/yson/node/node_io.h>
  9. #include <library/cpp/type_info/type.h>
  10. #include <util/generic/xrange.h>
  11. namespace NYT {
  12. using ::google::protobuf::Descriptor;
  13. ////////////////////////////////////////////////////////////////////////////////
  14. TSortColumn::TSortColumn(TStringBuf name, ESortOrder sortOrder)
  15. : Name_(name)
  16. , SortOrder_(sortOrder)
  17. { }
  18. TSortColumn::TSortColumn(const TString& name, ESortOrder sortOrder)
  19. : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
  20. { }
  21. TSortColumn::TSortColumn(const char* name, ESortOrder sortOrder)
  22. : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
  23. { }
  24. const TSortColumn& TSortColumn::EnsureAscending() const
  25. {
  26. Y_ENSURE(SortOrder() == ESortOrder::SO_ASCENDING);
  27. return *this;
  28. }
  29. TNode TSortColumn::ToNode() const
  30. {
  31. return BuildYsonNodeFluently().Value(*this);
  32. }
  33. ////////////////////////////////////////////////////////////////////////////////
  34. // Below lie backward compatibility methods.
  35. ////////////////////////////////////////////////////////////////////////////////
  36. TSortColumn& TSortColumn::operator = (TStringBuf name)
  37. {
  38. EnsureAscending();
  39. Name_ = name;
  40. return *this;
  41. }
  42. TSortColumn& TSortColumn::operator = (const TString& name)
  43. {
  44. return (*this = static_cast<TStringBuf>(name));
  45. }
  46. TSortColumn& TSortColumn::operator = (const char* name)
  47. {
  48. return (*this = static_cast<TStringBuf>(name));
  49. }
  50. bool TSortColumn::operator == (TStringBuf rhsName) const
  51. {
  52. EnsureAscending();
  53. return Name_ == rhsName;
  54. }
  55. bool TSortColumn::operator == (const TString& rhsName) const
  56. {
  57. return *this == static_cast<TStringBuf>(rhsName);
  58. }
  59. bool TSortColumn::operator == (const char* rhsName) const
  60. {
  61. return *this == static_cast<TStringBuf>(rhsName);
  62. }
  63. TSortColumn::operator TStringBuf() const
  64. {
  65. EnsureAscending();
  66. return Name_;
  67. }
  68. TSortColumn::operator TString() const
  69. {
  70. return TString(static_cast<TStringBuf>(*this));
  71. }
  72. TSortColumn::operator std::string() const
  73. {
  74. EnsureAscending();
  75. return static_cast<std::string>(Name_);
  76. }
  77. ////////////////////////////////////////////////////////////////////////////////
  78. TSortColumns::TSortColumns()
  79. { }
  80. TSortColumns::TSortColumns(const TVector<TString>& names)
  81. {
  82. Parts_.assign(names.begin(), names.end());
  83. }
  84. TSortColumns::TSortColumns(const TColumnNames& names)
  85. : TSortColumns(names.Parts_)
  86. { }
  87. TSortColumns::operator TColumnNames() const
  88. {
  89. return TColumnNames(EnsureAscending().GetNames());
  90. }
  91. const TSortColumns& TSortColumns::EnsureAscending() const
  92. {
  93. for (const auto& sortColumn : Parts_) {
  94. sortColumn.EnsureAscending();
  95. }
  96. return *this;
  97. }
  98. TVector<TString> TSortColumns::GetNames() const
  99. {
  100. TVector<TString> names;
  101. names.reserve(Parts_.size());
  102. for (const auto& sortColumn : Parts_) {
  103. names.push_back(sortColumn.Name());
  104. }
  105. return names;
  106. }
  107. ////////////////////////////////////////////////////////////////////////////////
  108. static NTi::TTypePtr OldTypeToTypeV3(EValueType type)
  109. {
  110. switch (type) {
  111. case VT_INT64:
  112. return NTi::Int64();
  113. case VT_UINT64:
  114. return NTi::Uint64();
  115. case VT_DOUBLE:
  116. return NTi::Double();
  117. case VT_BOOLEAN:
  118. return NTi::Bool();
  119. case VT_STRING:
  120. return NTi::String();
  121. case VT_ANY:
  122. return NTi::Yson();
  123. case VT_INT8:
  124. return NTi::Int8();
  125. case VT_INT16:
  126. return NTi::Int16();
  127. case VT_INT32:
  128. return NTi::Int32();
  129. case VT_UINT8:
  130. return NTi::Uint8();
  131. case VT_UINT16:
  132. return NTi::Uint16();
  133. case VT_UINT32:
  134. return NTi::Uint32();
  135. case VT_UTF8:
  136. return NTi::Utf8();
  137. case VT_NULL:
  138. return NTi::Null();
  139. case VT_VOID:
  140. return NTi::Void();
  141. case VT_DATE:
  142. return NTi::Date();
  143. case VT_DATETIME:
  144. return NTi::Datetime();
  145. case VT_TIMESTAMP:
  146. return NTi::Timestamp();
  147. case VT_INTERVAL:
  148. return NTi::Interval();
  149. case VT_FLOAT:
  150. return NTi::Float();
  151. case VT_JSON:
  152. return NTi::Json();
  153. case VT_DATE32:
  154. return NTi::Date32();
  155. case VT_DATETIME64:
  156. return NTi::Datetime64();
  157. case VT_TIMESTAMP64:
  158. return NTi::Timestamp64();
  159. case VT_INTERVAL64:
  160. return NTi::Interval64();
  161. case VT_UUID:
  162. return NTi::Uuid();
  163. }
  164. }
  165. static std::pair<EValueType, bool> Simplify(const NTi::TTypePtr& type)
  166. {
  167. using namespace NTi;
  168. const auto typeName = type->GetTypeName();
  169. switch (typeName) {
  170. case ETypeName::Bool:
  171. return {VT_BOOLEAN, true};
  172. case ETypeName::Int8:
  173. return {VT_INT8, true};
  174. case ETypeName::Int16:
  175. return {VT_INT16, true};
  176. case ETypeName::Int32:
  177. return {VT_INT32, true};
  178. case ETypeName::Int64:
  179. return {VT_INT64, true};
  180. case ETypeName::Uint8:
  181. return {VT_UINT8, true};
  182. case ETypeName::Uint16:
  183. return {VT_UINT16, true};
  184. case ETypeName::Uint32:
  185. return {VT_UINT32, true};
  186. case ETypeName::Uint64:
  187. return {VT_UINT64, true};
  188. case ETypeName::Float:
  189. return {VT_FLOAT, true};
  190. case ETypeName::Double:
  191. return {VT_DOUBLE, true};
  192. case ETypeName::String:
  193. return {VT_STRING, true};
  194. case ETypeName::Utf8:
  195. return {VT_UTF8, true};
  196. case ETypeName::Date:
  197. return {VT_DATE, true};
  198. case ETypeName::Datetime:
  199. return {VT_DATETIME, true};
  200. case ETypeName::Timestamp:
  201. return {VT_TIMESTAMP, true};
  202. case ETypeName::Interval:
  203. return {VT_INTERVAL, true};
  204. case ETypeName::TzDate:
  205. case ETypeName::TzDatetime:
  206. case ETypeName::TzTimestamp:
  207. break;
  208. case ETypeName::Json:
  209. return {VT_JSON, true};
  210. case ETypeName::Decimal:
  211. return {VT_STRING, true};
  212. case ETypeName::Uuid:
  213. return {VT_UUID, true};
  214. case ETypeName::Yson:
  215. return {VT_ANY, true};
  216. case ETypeName::Date32:
  217. return {VT_DATE32, true};
  218. case ETypeName::Datetime64:
  219. return {VT_DATETIME64, true};
  220. case ETypeName::Timestamp64:
  221. return {VT_TIMESTAMP64, true};
  222. case ETypeName::Interval64:
  223. return {VT_INTERVAL64, true};
  224. case ETypeName::Void:
  225. return {VT_VOID, false};
  226. case ETypeName::Null:
  227. return {VT_NULL, false};
  228. case ETypeName::Optional:
  229. {
  230. auto itemType = type->AsOptional()->GetItemType();
  231. if (itemType->IsPrimitive()) {
  232. auto simplified = Simplify(itemType->AsPrimitive());
  233. if (simplified.second) {
  234. simplified.second = false;
  235. return simplified;
  236. }
  237. }
  238. return {VT_ANY, false};
  239. }
  240. case ETypeName::List:
  241. return {VT_ANY, true};
  242. case ETypeName::Dict:
  243. return {VT_ANY, true};
  244. case ETypeName::Struct:
  245. return {VT_ANY, true};
  246. case ETypeName::Tuple:
  247. return {VT_ANY, true};
  248. case ETypeName::Variant:
  249. return {VT_ANY, true};
  250. case ETypeName::Tagged:
  251. return Simplify(type->AsTagged()->GetItemType());
  252. }
  253. ythrow TApiUsageError() << "Unsupported type: " << typeName;
  254. }
  255. NTi::TTypePtr ToTypeV3(EValueType type, bool required)
  256. {
  257. auto typeV3 = OldTypeToTypeV3(type);
  258. if (!Simplify(typeV3).second) {
  259. if (required) {
  260. ythrow TApiUsageError() << "type: " << type << " cannot be required";
  261. } else {
  262. return typeV3;
  263. }
  264. }
  265. if (required) {
  266. return typeV3;
  267. } else {
  268. return NTi::Optional(typeV3);
  269. }
  270. }
  271. TColumnSchema::TColumnSchema()
  272. : TypeV3_(NTi::Optional(NTi::Int64()))
  273. { }
  274. EValueType TColumnSchema::Type() const
  275. {
  276. return Simplify(TypeV3_).first;
  277. }
  278. TColumnSchema& TColumnSchema::Type(EValueType type) &
  279. {
  280. return Type(ToTypeV3(type, false));
  281. }
  282. TColumnSchema TColumnSchema::Type(EValueType type) &&
  283. {
  284. return Type(ToTypeV3(type, false));
  285. }
  286. TColumnSchema& TColumnSchema::Type(const NTi::TTypePtr& type) &
  287. {
  288. Y_ABORT_UNLESS(type.Get(), "Cannot create column schema with nullptr type");
  289. TypeV3_ = type;
  290. return *this;
  291. }
  292. TColumnSchema TColumnSchema::Type(const NTi::TTypePtr& type) &&
  293. {
  294. Y_ABORT_UNLESS(type.Get(), "Cannot create column schema with nullptr type");
  295. TypeV3_ = type;
  296. return *this;
  297. }
  298. TColumnSchema& TColumnSchema::TypeV3(const NTi::TTypePtr& type) &
  299. {
  300. return Type(type);
  301. }
  302. TColumnSchema TColumnSchema::TypeV3(const NTi::TTypePtr& type) &&
  303. {
  304. return Type(type);
  305. }
  306. NTi::TTypePtr TColumnSchema::TypeV3() const
  307. {
  308. return TypeV3_;
  309. }
  310. bool TColumnSchema::Required() const
  311. {
  312. return Simplify(TypeV3_).second;
  313. }
  314. TColumnSchema& TColumnSchema::Type(EValueType type, bool required) &
  315. {
  316. return Type(ToTypeV3(type, required));
  317. }
  318. TColumnSchema TColumnSchema::Type(EValueType type, bool required) &&
  319. {
  320. return Type(ToTypeV3(type, required));
  321. }
  322. bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs)
  323. {
  324. return
  325. lhs.Name() == rhs.Name() &&
  326. NTi::NEq::TStrictlyEqual()(lhs.TypeV3(), rhs.TypeV3()) &&
  327. lhs.SortOrder() == rhs.SortOrder() &&
  328. lhs.Lock() == rhs.Lock() &&
  329. lhs.Expression() == rhs.Expression() &&
  330. lhs.Aggregate() == rhs.Aggregate() &&
  331. lhs.Group() == rhs.Group();
  332. }
  333. ////////////////////////////////////////////////////////////////////////////////
  334. bool TTableSchema::Empty() const
  335. {
  336. return Columns_.empty();
  337. }
  338. TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type) &
  339. {
  340. Columns_.push_back(TColumnSchema().Name(name).Type(type));
  341. return *this;
  342. }
  343. TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type) &&
  344. {
  345. return std::move(AddColumn(name, type));
  346. }
  347. TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &
  348. {
  349. Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
  350. return *this;
  351. }
  352. TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&
  353. {
  354. return std::move(AddColumn(name, type, sortOrder));
  355. }
  356. TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &
  357. {
  358. Columns_.push_back(TColumnSchema().Name(name).Type(type));
  359. return *this;
  360. }
  361. TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &&
  362. {
  363. return std::move(AddColumn(name, type));
  364. }
  365. TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &
  366. {
  367. Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
  368. return *this;
  369. }
  370. TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&
  371. {
  372. return std::move(AddColumn(name, type, sortOrder));
  373. }
  374. TTableSchema& TTableSchema::SortBy(const TSortColumns& sortColumns) &
  375. {
  376. Y_ENSURE(sortColumns.Parts_.size() <= Columns_.size());
  377. THashMap<TString, ui64> sortColumnIndex;
  378. for (auto i: xrange(sortColumns.Parts_.size())) {
  379. Y_ENSURE(sortColumnIndex.emplace(sortColumns.Parts_[i].Name(), i).second,
  380. "Key column name '" << sortColumns.Parts_[i].Name() << "' repeats in columns list");
  381. }
  382. TVector<TColumnSchema> newColumnsSorted(sortColumns.Parts_.size());
  383. TVector<TColumnSchema> newColumnsUnsorted;
  384. for (auto& column : Columns_) {
  385. auto it = sortColumnIndex.find(column.Name());
  386. if (it == sortColumnIndex.end()) {
  387. column.ResetSortOrder();
  388. newColumnsUnsorted.push_back(std::move(column));
  389. } else {
  390. auto index = it->second;
  391. const auto& sortColumn = sortColumns.Parts_[index];
  392. column.SortOrder(sortColumn.SortOrder());
  393. newColumnsSorted[index] = std::move(column);
  394. sortColumnIndex.erase(it);
  395. }
  396. }
  397. Y_ENSURE(sortColumnIndex.empty(), "Column name '" << sortColumnIndex.begin()->first
  398. << "' not found in table schema");
  399. newColumnsSorted.insert(newColumnsSorted.end(), newColumnsUnsorted.begin(), newColumnsUnsorted.end());
  400. Columns_ = std::move(newColumnsSorted);
  401. return *this;
  402. }
  403. TTableSchema TTableSchema::SortBy(const TSortColumns& sortColumns) &&
  404. {
  405. return std::move(SortBy(sortColumns));
  406. }
  407. TVector<TColumnSchema>& TTableSchema::MutableColumns()
  408. {
  409. return Columns_;
  410. }
  411. TNode TTableSchema::ToNode() const
  412. {
  413. TNode result;
  414. TNodeBuilder builder(&result);
  415. Serialize(*this, &builder);
  416. return result;
  417. }
  418. TTableSchema TTableSchema::FromNode(const TNode& node)
  419. {
  420. TTableSchema schema;
  421. Deserialize(schema, node);
  422. return schema;
  423. }
  424. bool operator==(const TTableSchema& lhs, const TTableSchema& rhs)
  425. {
  426. return
  427. lhs.Columns() == rhs.Columns() &&
  428. lhs.Strict() == rhs.Strict() &&
  429. lhs.UniqueKeys() == rhs.UniqueKeys();
  430. }
  431. void PrintTo(const TTableSchema& schema, std::ostream* out)
  432. {
  433. (*out) << NodeToYsonString(schema.ToNode(), NYson::EYsonFormat::Pretty);
  434. }
  435. ////////////////////////////////////////////////////////////////////////////////
  436. TKeyBound::TKeyBound(ERelation relation, TKey key)
  437. : Relation_(relation)
  438. , Key_(std::move(key))
  439. { }
  440. ////////////////////////////////////////////////////////////////////////////////
  441. TTableSchema CreateTableSchema(
  442. const Descriptor& messageDescriptor,
  443. const TSortColumns& sortColumns,
  444. bool keepFieldsWithoutExtension)
  445. {
  446. auto result = CreateTableSchema(messageDescriptor, keepFieldsWithoutExtension);
  447. if (!sortColumns.Parts_.empty()) {
  448. result.SortBy(sortColumns.Parts_);
  449. }
  450. return result;
  451. }
  452. TTableSchema CreateTableSchema(NTi::TTypePtr type)
  453. {
  454. Y_ABORT_UNLESS(type);
  455. TTableSchema schema;
  456. Deserialize(schema, NodeFromYsonString(NTi::NIo::AsYtSchema(type.Get())));
  457. return schema;
  458. }
  459. ////////////////////////////////////////////////////////////////////////////////
  460. bool IsTrivial(const TReadLimit& readLimit)
  461. {
  462. return !readLimit.Key_ && !readLimit.RowIndex_ && !readLimit.Offset_ && !readLimit.TabletIndex_ && !readLimit.KeyBound_;
  463. }
  464. EValueType NodeTypeToValueType(TNode::EType nodeType)
  465. {
  466. switch (nodeType) {
  467. case TNode::EType::Int64: return VT_INT64;
  468. case TNode::EType::Uint64: return VT_UINT64;
  469. case TNode::EType::String: return VT_STRING;
  470. case TNode::EType::Double: return VT_DOUBLE;
  471. case TNode::EType::Bool: return VT_BOOLEAN;
  472. default:
  473. ythrow yexception() << "Cannot convert TNode type " << nodeType << " to EValueType";
  474. }
  475. }
  476. ////////////////////////////////////////////////////////////////////////////////
  477. const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path)
  478. {
  479. static const TVector<TReadRange> empty;
  480. const auto& maybeRanges = path.GetRanges();
  481. if (maybeRanges.Empty()) {
  482. return empty;
  483. } else if (maybeRanges->size() > 0) {
  484. return *maybeRanges;
  485. } else {
  486. // If you see this exception, that means that caller of this function doesn't known what to do
  487. // with RichYPath that has set range list, but the range list is empty.
  488. //
  489. // To avoid this exception caller must explicitly handle such case.
  490. // NB. YT-17683
  491. ythrow TApiUsageError() << "Unsupported RichYPath: explicitly empty range list";
  492. }
  493. }
  494. ////////////////////////////////////////////////////////////////////////////////
  495. namespace NDetail {
  496. ////////////////////////////////////////////////////////////////////////////////
  497. TString ToString(EValueType type)
  498. {
  499. switch (type) {
  500. case VT_INT8:
  501. return "int8";
  502. case VT_INT16:
  503. return "int16";
  504. case VT_INT32:
  505. return "int32";
  506. case VT_INT64:
  507. return "int64";
  508. case VT_UINT8:
  509. return "uint8";
  510. case VT_UINT16:
  511. return "uint16";
  512. case VT_UINT32:
  513. return "uint32";
  514. case VT_UINT64:
  515. return "uint64";
  516. case VT_DOUBLE:
  517. return "double";
  518. case VT_BOOLEAN:
  519. return "boolean";
  520. case VT_STRING:
  521. return "string";
  522. case VT_UTF8:
  523. return "utf8";
  524. case VT_ANY:
  525. return "any";
  526. case VT_NULL:
  527. return "null";
  528. case VT_VOID:
  529. return "void";
  530. case VT_DATE:
  531. return "date";
  532. case VT_DATETIME:
  533. return "datetime";
  534. case VT_TIMESTAMP:
  535. return "timestamp";
  536. case VT_INTERVAL:
  537. return "interval";
  538. case VT_FLOAT:
  539. return "float";
  540. case VT_JSON:
  541. return "json";
  542. case VT_DATE32:
  543. return "date32";
  544. case VT_DATETIME64:
  545. return "datetime64";
  546. case VT_TIMESTAMP64:
  547. return "timestamp64";
  548. case VT_INTERVAL64:
  549. return "interval64";
  550. case VT_UUID:
  551. return "uuid";
  552. }
  553. ythrow yexception() << "Invalid value type " << static_cast<int>(type);
  554. }
  555. ////////////////////////////////////////////////////////////////////////////////
  556. } // namespace NDetail
  557. } // namespace NYT
  558. template <>
  559. void Out<NYT::TSortColumn>(IOutputStream& os, const NYT::TSortColumn& sortColumn)
  560. {
  561. if (sortColumn.SortOrder() == NYT::ESortOrder::SO_ASCENDING) {
  562. os << sortColumn.Name();
  563. } else {
  564. os << NYT::BuildYsonStringFluently(NYson::EYsonFormat::Text).Value(sortColumn);
  565. }
  566. }