common.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. #include "common.h"
  2. #include "errors.h"
  3. #include "format.h"
  4. #include "serialize.h"
  5. #include "fluent.h"
  6. #include <yt/yt_proto/yt/formats/extension.pb.h>
  7. #include <library/cpp/yson/node/node_builder.h>
  8. #include <library/cpp/yson/node/node_io.h>
  9. #include <library/cpp/type_info/type.h>
  10. #include <util/generic/xrange.h>
  11. namespace NYT {
  12. using ::google::protobuf::Descriptor;
  13. ////////////////////////////////////////////////////////////////////////////////
  14. TSortColumn::TSortColumn(TStringBuf name, ESortOrder sortOrder)
  15. : Name_(name)
  16. , SortOrder_(sortOrder)
  17. { }
  18. TSortColumn::TSortColumn(const TString& name, ESortOrder sortOrder)
  19. : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
  20. { }
  21. TSortColumn::TSortColumn(const char* name, ESortOrder sortOrder)
  22. : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
  23. { }
  24. const TSortColumn& TSortColumn::EnsureAscending() const
  25. {
  26. Y_ENSURE(SortOrder() == ESortOrder::SO_ASCENDING);
  27. return *this;
  28. }
  29. TNode TSortColumn::ToNode() const
  30. {
  31. return BuildYsonNodeFluently().Value(*this);
  32. }
  33. ////////////////////////////////////////////////////////////////////////////////
  34. // Below lie backward compatibility methods.
  35. ////////////////////////////////////////////////////////////////////////////////
  36. TSortColumn& TSortColumn::operator = (TStringBuf name)
  37. {
  38. EnsureAscending();
  39. Name_ = name;
  40. return *this;
  41. }
  42. TSortColumn& TSortColumn::operator = (const TString& name)
  43. {
  44. return (*this = static_cast<TStringBuf>(name));
  45. }
  46. TSortColumn& TSortColumn::operator = (const char* name)
  47. {
  48. return (*this = static_cast<TStringBuf>(name));
  49. }
  50. bool TSortColumn::operator == (TStringBuf rhsName) const
  51. {
  52. EnsureAscending();
  53. return Name_ == rhsName;
  54. }
  55. bool TSortColumn::operator == (const TString& rhsName) const
  56. {
  57. return *this == static_cast<TStringBuf>(rhsName);
  58. }
  59. bool TSortColumn::operator == (const char* rhsName) const
  60. {
  61. return *this == static_cast<TStringBuf>(rhsName);
  62. }
  63. TSortColumn::operator TStringBuf() const
  64. {
  65. EnsureAscending();
  66. return Name_;
  67. }
  68. TSortColumn::operator TString() const
  69. {
  70. return TString(static_cast<TStringBuf>(*this));
  71. }
  72. TSortColumn::operator std::string() const
  73. {
  74. EnsureAscending();
  75. return static_cast<std::string>(Name_);
  76. }
  77. ////////////////////////////////////////////////////////////////////////////////
  78. TSortColumns::TSortColumns()
  79. { }
  80. TSortColumns::TSortColumns(const TVector<TString>& names)
  81. {
  82. Parts_.assign(names.begin(), names.end());
  83. }
  84. TSortColumns::TSortColumns(const TColumnNames& names)
  85. : TSortColumns(names.Parts_)
  86. { }
  87. TSortColumns::operator TColumnNames() const
  88. {
  89. return TColumnNames(EnsureAscending().GetNames());
  90. }
  91. const TSortColumns& TSortColumns::EnsureAscending() const
  92. {
  93. for (const auto& sortColumn : Parts_) {
  94. sortColumn.EnsureAscending();
  95. }
  96. return *this;
  97. }
  98. TVector<TString> TSortColumns::GetNames() const
  99. {
  100. TVector<TString> names;
  101. names.reserve(Parts_.size());
  102. for (const auto& sortColumn : Parts_) {
  103. names.push_back(sortColumn.Name());
  104. }
  105. return names;
  106. }
  107. ////////////////////////////////////////////////////////////////////////////////
  108. static NTi::TTypePtr OldTypeToTypeV3(EValueType type)
  109. {
  110. switch (type) {
  111. case VT_INT64:
  112. return NTi::Int64();
  113. case VT_UINT64:
  114. return NTi::Uint64();
  115. case VT_DOUBLE:
  116. return NTi::Double();
  117. case VT_BOOLEAN:
  118. return NTi::Bool();
  119. case VT_STRING:
  120. return NTi::String();
  121. case VT_ANY:
  122. return NTi::Yson();
  123. case VT_INT8:
  124. return NTi::Int8();
  125. case VT_INT16:
  126. return NTi::Int16();
  127. case VT_INT32:
  128. return NTi::Int32();
  129. case VT_UINT8:
  130. return NTi::Uint8();
  131. case VT_UINT16:
  132. return NTi::Uint16();
  133. case VT_UINT32:
  134. return NTi::Uint32();
  135. case VT_UTF8:
  136. return NTi::Utf8();
  137. case VT_NULL:
  138. return NTi::Null();
  139. case VT_VOID:
  140. return NTi::Void();
  141. case VT_DATE:
  142. return NTi::Date();
  143. case VT_DATETIME:
  144. return NTi::Datetime();
  145. case VT_TIMESTAMP:
  146. return NTi::Timestamp();
  147. case VT_INTERVAL:
  148. return NTi::Interval();
  149. case VT_FLOAT:
  150. return NTi::Float();
  151. case VT_JSON:
  152. return NTi::Json();
  153. }
  154. }
  155. static std::pair<EValueType, bool> Simplify(const NTi::TTypePtr& type)
  156. {
  157. using namespace NTi;
  158. const auto typeName = type->GetTypeName();
  159. switch (typeName) {
  160. case ETypeName::Bool:
  161. return {VT_BOOLEAN, true};
  162. case ETypeName::Int8:
  163. return {VT_INT8, true};
  164. case ETypeName::Int16:
  165. return {VT_INT16, true};
  166. case ETypeName::Int32:
  167. return {VT_INT32, true};
  168. case ETypeName::Int64:
  169. return {VT_INT64, true};
  170. case ETypeName::Uint8:
  171. return {VT_UINT8, true};
  172. case ETypeName::Uint16:
  173. return {VT_UINT16, true};
  174. case ETypeName::Uint32:
  175. return {VT_UINT32, true};
  176. case ETypeName::Uint64:
  177. return {VT_UINT64, true};
  178. case ETypeName::Float:
  179. return {VT_FLOAT, true};
  180. case ETypeName::Double:
  181. return {VT_DOUBLE, true};
  182. case ETypeName::String:
  183. return {VT_STRING, true};
  184. case ETypeName::Utf8:
  185. return {VT_UTF8, true};
  186. case ETypeName::Date:
  187. return {VT_DATE, true};
  188. case ETypeName::Datetime:
  189. return {VT_DATETIME, true};
  190. case ETypeName::Timestamp:
  191. return {VT_TIMESTAMP, true};
  192. case ETypeName::Interval:
  193. return {VT_INTERVAL, true};
  194. case ETypeName::TzDate:
  195. case ETypeName::TzDatetime:
  196. case ETypeName::TzTimestamp:
  197. break;
  198. case ETypeName::Json:
  199. return {VT_JSON, true};
  200. case ETypeName::Decimal:
  201. return {VT_STRING, true};
  202. case ETypeName::Uuid:
  203. break;
  204. case ETypeName::Yson:
  205. return {VT_ANY, true};
  206. case ETypeName::Void:
  207. return {VT_VOID, false};
  208. case ETypeName::Null:
  209. return {VT_NULL, false};
  210. case ETypeName::Optional:
  211. {
  212. auto itemType = type->AsOptional()->GetItemType();
  213. if (itemType->IsPrimitive()) {
  214. auto simplified = Simplify(itemType->AsPrimitive());
  215. if (simplified.second) {
  216. simplified.second = false;
  217. return simplified;
  218. }
  219. }
  220. return {VT_ANY, false};
  221. }
  222. case ETypeName::List:
  223. return {VT_ANY, true};
  224. case ETypeName::Dict:
  225. return {VT_ANY, true};
  226. case ETypeName::Struct:
  227. return {VT_ANY, true};
  228. case ETypeName::Tuple:
  229. return {VT_ANY, true};
  230. case ETypeName::Variant:
  231. return {VT_ANY, true};
  232. case ETypeName::Tagged:
  233. return Simplify(type->AsTagged()->GetItemType());
  234. }
  235. ythrow TApiUsageError() << "Unsupported type: " << typeName;
  236. }
  237. NTi::TTypePtr ToTypeV3(EValueType type, bool required)
  238. {
  239. auto typeV3 = OldTypeToTypeV3(type);
  240. if (!Simplify(typeV3).second) {
  241. if (required) {
  242. ythrow TApiUsageError() << "type: " << type << " cannot be required";
  243. } else {
  244. return typeV3;
  245. }
  246. }
  247. if (required) {
  248. return typeV3;
  249. } else {
  250. return NTi::Optional(typeV3);
  251. }
  252. }
  253. TColumnSchema::TColumnSchema()
  254. : TypeV3_(NTi::Optional(NTi::Int64()))
  255. { }
  256. EValueType TColumnSchema::Type() const
  257. {
  258. return Simplify(TypeV3_).first;
  259. }
  260. TColumnSchema& TColumnSchema::Type(EValueType type) &
  261. {
  262. return Type(ToTypeV3(type, false));
  263. }
  264. TColumnSchema TColumnSchema::Type(EValueType type) &&
  265. {
  266. return Type(ToTypeV3(type, false));
  267. }
  268. TColumnSchema& TColumnSchema::Type(const NTi::TTypePtr& type) &
  269. {
  270. Y_ABORT_UNLESS(type.Get(), "Cannot create column schema with nullptr type");
  271. TypeV3_ = type;
  272. return *this;
  273. }
  274. TColumnSchema TColumnSchema::Type(const NTi::TTypePtr& type) &&
  275. {
  276. Y_ABORT_UNLESS(type.Get(), "Cannot create column schema with nullptr type");
  277. TypeV3_ = type;
  278. return *this;
  279. }
  280. TColumnSchema& TColumnSchema::TypeV3(const NTi::TTypePtr& type) &
  281. {
  282. return Type(type);
  283. }
  284. TColumnSchema TColumnSchema::TypeV3(const NTi::TTypePtr& type) &&
  285. {
  286. return Type(type);
  287. }
  288. NTi::TTypePtr TColumnSchema::TypeV3() const
  289. {
  290. return TypeV3_;
  291. }
  292. bool TColumnSchema::Required() const
  293. {
  294. return Simplify(TypeV3_).second;
  295. }
  296. TColumnSchema& TColumnSchema::Type(EValueType type, bool required) &
  297. {
  298. return Type(ToTypeV3(type, required));
  299. }
  300. TColumnSchema TColumnSchema::Type(EValueType type, bool required) &&
  301. {
  302. return Type(ToTypeV3(type, required));
  303. }
  304. bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs)
  305. {
  306. return
  307. lhs.Name() == rhs.Name() &&
  308. NTi::NEq::TStrictlyEqual()(lhs.TypeV3(), rhs.TypeV3()) &&
  309. lhs.SortOrder() == rhs.SortOrder() &&
  310. lhs.Lock() == rhs.Lock() &&
  311. lhs.Expression() == rhs.Expression() &&
  312. lhs.Aggregate() == rhs.Aggregate() &&
  313. lhs.Group() == rhs.Group();
  314. }
  315. ////////////////////////////////////////////////////////////////////////////////
  316. bool TTableSchema::Empty() const
  317. {
  318. return Columns_.empty();
  319. }
  320. TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type) &
  321. {
  322. Columns_.push_back(TColumnSchema().Name(name).Type(type));
  323. return *this;
  324. }
  325. TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type) &&
  326. {
  327. return std::move(AddColumn(name, type));
  328. }
  329. TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &
  330. {
  331. Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
  332. return *this;
  333. }
  334. TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&
  335. {
  336. return std::move(AddColumn(name, type, sortOrder));
  337. }
  338. TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &
  339. {
  340. Columns_.push_back(TColumnSchema().Name(name).Type(type));
  341. return *this;
  342. }
  343. TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &&
  344. {
  345. return std::move(AddColumn(name, type));
  346. }
  347. TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &
  348. {
  349. Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
  350. return *this;
  351. }
  352. TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&
  353. {
  354. return std::move(AddColumn(name, type, sortOrder));
  355. }
  356. TTableSchema& TTableSchema::SortBy(const TSortColumns& sortColumns) &
  357. {
  358. Y_ENSURE(sortColumns.Parts_.size() <= Columns_.size());
  359. THashMap<TString, ui64> sortColumnIndex;
  360. for (auto i: xrange(sortColumns.Parts_.size())) {
  361. Y_ENSURE(sortColumnIndex.emplace(sortColumns.Parts_[i].Name(), i).second,
  362. "Key column name '" << sortColumns.Parts_[i].Name() << "' repeats in columns list");
  363. }
  364. TVector<TColumnSchema> newColumnsSorted(sortColumns.Parts_.size());
  365. TVector<TColumnSchema> newColumnsUnsorted;
  366. for (auto& column : Columns_) {
  367. auto it = sortColumnIndex.find(column.Name());
  368. if (it == sortColumnIndex.end()) {
  369. column.ResetSortOrder();
  370. newColumnsUnsorted.push_back(std::move(column));
  371. } else {
  372. auto index = it->second;
  373. const auto& sortColumn = sortColumns.Parts_[index];
  374. column.SortOrder(sortColumn.SortOrder());
  375. newColumnsSorted[index] = std::move(column);
  376. sortColumnIndex.erase(it);
  377. }
  378. }
  379. Y_ENSURE(sortColumnIndex.empty(), "Column name '" << sortColumnIndex.begin()->first
  380. << "' not found in table schema");
  381. newColumnsSorted.insert(newColumnsSorted.end(), newColumnsUnsorted.begin(), newColumnsUnsorted.end());
  382. Columns_ = std::move(newColumnsSorted);
  383. return *this;
  384. }
  385. TTableSchema TTableSchema::SortBy(const TSortColumns& sortColumns) &&
  386. {
  387. return std::move(SortBy(sortColumns));
  388. }
  389. TVector<TColumnSchema>& TTableSchema::MutableColumns()
  390. {
  391. return Columns_;
  392. }
  393. TNode TTableSchema::ToNode() const
  394. {
  395. TNode result;
  396. TNodeBuilder builder(&result);
  397. Serialize(*this, &builder);
  398. return result;
  399. }
  400. TTableSchema TTableSchema::FromNode(const TNode& node)
  401. {
  402. TTableSchema schema;
  403. Deserialize(schema, node);
  404. return schema;
  405. }
  406. bool operator==(const TTableSchema& lhs, const TTableSchema& rhs)
  407. {
  408. return
  409. lhs.Columns() == rhs.Columns() &&
  410. lhs.Strict() == rhs.Strict() &&
  411. lhs.UniqueKeys() == rhs.UniqueKeys();
  412. }
  413. void PrintTo(const TTableSchema& schema, std::ostream* out)
  414. {
  415. (*out) << NodeToYsonString(schema.ToNode(), NYson::EYsonFormat::Pretty);
  416. }
  417. ////////////////////////////////////////////////////////////////////////////////
  418. TKeyBound::TKeyBound(ERelation relation, TKey key)
  419. : Relation_(relation)
  420. , Key_(std::move(key))
  421. { }
  422. ////////////////////////////////////////////////////////////////////////////////
  423. TTableSchema CreateTableSchema(
  424. const Descriptor& messageDescriptor,
  425. const TSortColumns& sortColumns,
  426. bool keepFieldsWithoutExtension)
  427. {
  428. auto result = CreateTableSchema(messageDescriptor, keepFieldsWithoutExtension);
  429. if (!sortColumns.Parts_.empty()) {
  430. result.SortBy(sortColumns.Parts_);
  431. }
  432. return result;
  433. }
  434. TTableSchema CreateTableSchema(NTi::TTypePtr type)
  435. {
  436. Y_ABORT_UNLESS(type);
  437. TTableSchema schema;
  438. Deserialize(schema, NodeFromYsonString(NTi::NIo::AsYtSchema(type.Get())));
  439. return schema;
  440. }
  441. ////////////////////////////////////////////////////////////////////////////////
  442. bool IsTrivial(const TReadLimit& readLimit)
  443. {
  444. return !readLimit.Key_ && !readLimit.RowIndex_ && !readLimit.Offset_ && !readLimit.TabletIndex_ && !readLimit.KeyBound_;
  445. }
  446. EValueType NodeTypeToValueType(TNode::EType nodeType)
  447. {
  448. switch (nodeType) {
  449. case TNode::EType::Int64: return VT_INT64;
  450. case TNode::EType::Uint64: return VT_UINT64;
  451. case TNode::EType::String: return VT_STRING;
  452. case TNode::EType::Double: return VT_DOUBLE;
  453. case TNode::EType::Bool: return VT_BOOLEAN;
  454. default:
  455. ythrow yexception() << "Cannot convert TNode type " << nodeType << " to EValueType";
  456. }
  457. }
  458. ////////////////////////////////////////////////////////////////////////////////
  459. const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path)
  460. {
  461. static const TVector<TReadRange> empty;
  462. const auto& maybeRanges = path.GetRanges();
  463. if (maybeRanges.Empty()) {
  464. return empty;
  465. } else if (maybeRanges->size() > 0) {
  466. return *maybeRanges;
  467. } else {
  468. // If you see this exception, that means that caller of this function doesn't known what to do
  469. // with RichYPath that has set range list, but the range list is empty.
  470. //
  471. // To avoid this exception caller must explicitly handle such case.
  472. // NB. YT-17683
  473. ythrow TApiUsageError() << "Unsupported RichYPath: explicitly empty range list";
  474. }
  475. }
  476. ////////////////////////////////////////////////////////////////////////////////
  477. namespace NDetail {
  478. ////////////////////////////////////////////////////////////////////////////////
  479. TString ToString(EValueType type)
  480. {
  481. switch (type) {
  482. case VT_INT8:
  483. return "int8";
  484. case VT_INT16:
  485. return "int16";
  486. case VT_INT32:
  487. return "int32";
  488. case VT_INT64:
  489. return "int64";
  490. case VT_UINT8:
  491. return "uint8";
  492. case VT_UINT16:
  493. return "uint16";
  494. case VT_UINT32:
  495. return "uint32";
  496. case VT_UINT64:
  497. return "uint64";
  498. case VT_DOUBLE:
  499. return "double";
  500. case VT_BOOLEAN:
  501. return "boolean";
  502. case VT_STRING:
  503. return "string";
  504. case VT_UTF8:
  505. return "utf8";
  506. case VT_ANY:
  507. return "any";
  508. case VT_NULL:
  509. return "null";
  510. case VT_VOID:
  511. return "void";
  512. case VT_DATE:
  513. return "date";
  514. case VT_DATETIME:
  515. return "datetime";
  516. case VT_TIMESTAMP:
  517. return "timestamp";
  518. case VT_INTERVAL:
  519. return "interval";
  520. case VT_FLOAT:
  521. return "float";
  522. case VT_JSON:
  523. return "json";
  524. }
  525. ythrow yexception() << "Invalid value type " << static_cast<int>(type);
  526. }
  527. ////////////////////////////////////////////////////////////////////////////////
  528. } // namespace NDetail
  529. } // namespace NYT
  530. template <>
  531. void Out<NYT::TSortColumn>(IOutputStream& os, const NYT::TSortColumn& sortColumn)
  532. {
  533. if (sortColumn.SortOrder() == NYT::ESortOrder::SO_ASCENDING) {
  534. os << sortColumn.Name();
  535. } else {
  536. os << NYT::BuildYsonStringFluently(NYson::EYsonFormat::Text).Value(sortColumn);
  537. }
  538. }