executor.cpp 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064
  1. #include "executor.h"
  2. #include <yql/essentials/minikql/jsonpath/parser/parse_double.h>
  3. #include <yql/essentials/core/issue/protos/issue_id.pb.h>
  4. #include <yql/essentials/minikql/dom/node.h>
  5. #include <util/generic/scope.h>
  6. #include <util/generic/maybe.h>
  7. #include <util/system/compiler.h>
  8. #include <cmath>
  9. namespace NYql::NJsonPath {
  10. using namespace NJson;
  11. using namespace NUdf;
  12. using namespace NDom;
  13. namespace {
  14. bool IsObjectOrArray(const TValue& value) {
  15. return value.IsArray() || value.IsObject();
  16. }
  17. TIssue MakeError(TPosition pos, TIssueCode code, const TStringBuf message) {
  18. TIssue error(pos, message);
  19. error.SetCode(code, TSeverityIds::S_ERROR);
  20. return error;
  21. }
  22. TIssue MakeError(const TJsonPathItem& item, TIssueCode code, const TStringBuf message) {
  23. return MakeError(item.Pos, code, message);
  24. }
  25. }
  26. TResult::TResult(TJsonNodes&& nodes)
  27. : Result(std::move(nodes))
  28. {
  29. }
  30. TResult::TResult(const TJsonNodes& nodes)
  31. : Result(nodes)
  32. {
  33. }
  34. TResult::TResult(TIssue&& issue)
  35. : Result(std::move(issue))
  36. {
  37. }
  38. const TJsonNodes& TResult::GetNodes() const {
  39. return std::get<TJsonNodes>(Result);
  40. }
  41. TJsonNodes& TResult::GetNodes() {
  42. return std::get<TJsonNodes>(Result);
  43. }
  44. const TIssue& TResult::GetError() const {
  45. return std::get<TIssue>(Result);
  46. }
  47. bool TResult::IsError() const {
  48. return std::holds_alternative<TIssue>(Result);
  49. }
  50. TExecutor::TExecutor(
  51. const TJsonPathPtr path,
  52. const TJsonNodes& input,
  53. const TVariablesMap& variables,
  54. const IValueBuilder* valueBuilder)
  55. : Reader(path)
  56. , Input(input)
  57. , Variables(variables)
  58. , ValueBuilder(valueBuilder)
  59. {
  60. }
  61. bool TExecutor::IsZero(double value) {
  62. return -EPSILON <= value && value <= EPSILON;
  63. }
  64. bool TExecutor::IsLess(double a, double b) {
  65. return (b - a) > EPSILON;
  66. }
  67. bool TExecutor::IsGreater(double a, double b) {
  68. return (a - b) > EPSILON;
  69. }
  70. bool TExecutor::IsEqual(double a, double b) {
  71. return IsZero(a - b);
  72. }
  73. bool TExecutor::IsStrict() const {
  74. return Reader.GetMode() == EJsonPathMode::Strict;
  75. }
  76. bool TExecutor::IsLax() const {
  77. return Reader.GetMode() == EJsonPathMode::Lax;
  78. }
  79. TResult TExecutor::Execute() {
  80. return Execute(Reader.ReadFirst());
  81. }
  82. TResult TExecutor::Execute(const TJsonPathItem& item) {
  83. switch (item.Type) {
  84. case EJsonPathItemType::MemberAccess:
  85. return MemberAccess(item);
  86. case EJsonPathItemType::WildcardMemberAccess:
  87. return WildcardMemberAccess(item);
  88. case EJsonPathItemType::ContextObject:
  89. return ContextObject();
  90. case EJsonPathItemType::Variable:
  91. return Variable(item);
  92. case EJsonPathItemType::NumberLiteral:
  93. return NumberLiteral(item);
  94. case EJsonPathItemType::ArrayAccess:
  95. return ArrayAccess(item);
  96. case EJsonPathItemType::WildcardArrayAccess:
  97. return WildcardArrayAccess(item);
  98. case EJsonPathItemType::LastArrayIndex:
  99. return LastArrayIndex(item);
  100. case EJsonPathItemType::UnaryMinus:
  101. case EJsonPathItemType::UnaryPlus:
  102. return UnaryArithmeticOp(item);
  103. case EJsonPathItemType::BinaryAdd:
  104. case EJsonPathItemType::BinarySubstract:
  105. case EJsonPathItemType::BinaryMultiply:
  106. case EJsonPathItemType::BinaryDivide:
  107. case EJsonPathItemType::BinaryModulo:
  108. return BinaryArithmeticOp(item);
  109. case EJsonPathItemType::BinaryAnd:
  110. case EJsonPathItemType::BinaryOr:
  111. return BinaryLogicalOp(item);
  112. case EJsonPathItemType::UnaryNot:
  113. return UnaryLogicalOp(item);
  114. case EJsonPathItemType::BooleanLiteral:
  115. return BooleanLiteral(item);
  116. case EJsonPathItemType::NullLiteral:
  117. return NullLiteral();
  118. case EJsonPathItemType::StringLiteral:
  119. return StringLiteral(item);
  120. case EJsonPathItemType::FilterObject:
  121. return FilterObject(item);
  122. case EJsonPathItemType::FilterPredicate:
  123. return FilterPredicate(item);
  124. case EJsonPathItemType::BinaryLess:
  125. case EJsonPathItemType::BinaryLessEqual:
  126. case EJsonPathItemType::BinaryGreater:
  127. case EJsonPathItemType::BinaryGreaterEqual:
  128. case EJsonPathItemType::BinaryEqual:
  129. case EJsonPathItemType::BinaryNotEqual:
  130. return CompareOp(item);
  131. case EJsonPathItemType::AbsMethod:
  132. case EJsonPathItemType::FloorMethod:
  133. case EJsonPathItemType::CeilingMethod:
  134. return NumericMethod(item);
  135. case EJsonPathItemType::DoubleMethod:
  136. return DoubleMethod(item);
  137. case EJsonPathItemType::TypeMethod:
  138. return TypeMethod(item);
  139. case EJsonPathItemType::SizeMethod:
  140. return SizeMethod(item);
  141. case EJsonPathItemType::KeyValueMethod:
  142. return KeyValueMethod(item);
  143. case EJsonPathItemType::StartsWithPredicate:
  144. return StartsWithPredicate(item);
  145. case EJsonPathItemType::IsUnknownPredicate:
  146. return IsUnknownPredicate(item);
  147. case EJsonPathItemType::ExistsPredicate:
  148. return ExistsPredicate(item);
  149. case EJsonPathItemType::LikeRegexPredicate:
  150. return LikeRegexPredicate(item);
  151. }
  152. }
  153. TResult TExecutor::ContextObject() {
  154. return Input;
  155. }
  156. TResult TExecutor::Variable(const TJsonPathItem& item) {
  157. const auto it = Variables.find(item.GetString());
  158. if (it == Variables.end()) {
  159. return MakeError(item, TIssuesIds::JSONPATH_UNDEFINED_VARIABLE, TStringBuilder() << "Undefined variable '" << item.GetString() << "'");
  160. }
  161. return TJsonNodes({it->second});
  162. }
  163. TResult TExecutor::LastArrayIndex(const TJsonPathItem& item) {
  164. if (ArraySubscriptSource.empty()) {
  165. return MakeError(item, TIssuesIds::JSONPATH_LAST_OUTSIDE_OF_ARRAY_SUBSCRIPT, "'last' is only allowed inside array subscripts");
  166. }
  167. const auto& array = ArraySubscriptSource.top();
  168. const i64 arraySize = array.GetSize();
  169. // NOTE: For empty arrays `last` equals `-1`. This is intended, PostgreSQL 12 has the same behaviour
  170. return TJsonNodes({TValue(MakeDouble(static_cast<double>(arraySize - 1)))});
  171. }
  172. TResult TExecutor::NumberLiteral(const TJsonPathItem& item) {
  173. return TJsonNodes({TValue(MakeDouble(item.GetNumber()))});
  174. }
  175. TResult TExecutor::MemberAccess(const TJsonPathItem& item) {
  176. const auto input = Execute(Reader.ReadInput(item));
  177. if (input.IsError()) {
  178. return input;
  179. }
  180. TJsonNodes result;
  181. for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
  182. if (!node.IsObject()) {
  183. if (IsStrict()) {
  184. return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object");
  185. } else {
  186. continue;
  187. }
  188. }
  189. if (const auto payload = node.Lookup(item.GetString())) {
  190. result.push_back(*payload);
  191. continue;
  192. }
  193. if (IsStrict()) {
  194. return MakeError(item, TIssuesIds::JSONPATH_MEMBER_NOT_FOUND, "Member not found");
  195. }
  196. }
  197. return std::move(result);
  198. }
  199. TResult TExecutor::WildcardMemberAccess(const TJsonPathItem& item) {
  200. const auto input = Execute(Reader.ReadInput(item));
  201. if (input.IsError()) {
  202. return input;
  203. }
  204. TJsonNodes result;
  205. for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
  206. if (!node.IsObject()) {
  207. if (IsStrict()) {
  208. return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_OBJECT, "Expected object");
  209. } else {
  210. continue;
  211. }
  212. }
  213. TValue key;
  214. TValue value;
  215. auto it = node.GetObjectIterator();
  216. while (it.Next(key, value)) {
  217. result.push_back(value);
  218. }
  219. }
  220. return std::move(result);
  221. }
  222. TMaybe<TIssue> TExecutor::EnsureSingleSubscript(TPosition pos, const TJsonNodes& index, i64& result) {
  223. if (index.size() != 1) {
  224. return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Expected single number item for array index");
  225. }
  226. const auto& indexValue = index[0];
  227. if (!indexValue.IsNumber()) {
  228. return MakeError(pos, TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX, "Array index must be number");
  229. }
  230. result = static_cast<i64>(std::floor(indexValue.GetNumber()));
  231. return Nothing();
  232. }
  233. TMaybe<TIssue> TExecutor::EnsureArraySubscripts(const TJsonPathItem& item, TVector<TArraySubscript>& result) {
  234. for (const auto& subscript : item.GetSubscripts()) {
  235. const auto& fromItem = Reader.ReadFromSubscript(subscript);
  236. const auto fromResult = Execute(fromItem);
  237. if (fromResult.IsError()) {
  238. return fromResult.GetError();
  239. }
  240. i64 fromIndex = 0;
  241. TMaybe<TIssue> error = EnsureSingleSubscript(fromItem.Pos, fromResult.GetNodes(), fromIndex);
  242. if (error) {
  243. return error;
  244. }
  245. if (!subscript.IsRange()) {
  246. result.emplace_back(fromIndex, fromItem.Pos);
  247. continue;
  248. }
  249. const auto& toItem = Reader.ReadToSubscript(subscript);
  250. const auto toResult = Execute(toItem);
  251. if (toResult.IsError()) {
  252. return toResult.GetError();
  253. }
  254. i64 toIndex = 0;
  255. error = EnsureSingleSubscript(toItem.Pos, toResult.GetNodes(), toIndex);
  256. if (error) {
  257. return error;
  258. }
  259. result.emplace_back(fromIndex, fromItem.Pos, toIndex, toItem.Pos);
  260. }
  261. return Nothing();
  262. }
  263. TResult TExecutor::ArrayAccess(const TJsonPathItem& item) {
  264. const auto input = Execute(Reader.ReadInput(item));
  265. if (input.IsError()) {
  266. return input;
  267. }
  268. TJsonNodes result;
  269. for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) {
  270. if (!node.IsArray()) {
  271. return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array");
  272. }
  273. ArraySubscriptSource.push(node);
  274. Y_DEFER {
  275. ArraySubscriptSource.pop();
  276. };
  277. // Check for "hard" errors in array subscripts. These are forbidden even in lax mode
  278. // NOTE: We intentionally execute subscripts expressions for each array in the input
  279. // because they can contain `last` keyword which value is different for each array
  280. TVector<TArraySubscript> subscripts;
  281. TMaybe<TIssue> error = EnsureArraySubscripts(item, subscripts);
  282. if (error) {
  283. return std::move(*error);
  284. }
  285. const ui64 arraySize = node.GetSize();
  286. for (const auto& idx : subscripts) {
  287. // Check bounds for first subscript
  288. if (idx.GetFrom() < 0 || idx.GetFrom() >= static_cast<i64>(arraySize)) {
  289. if (IsStrict()) {
  290. return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds");
  291. } else {
  292. continue;
  293. }
  294. }
  295. // If there is no second subcripts, just return corresponding array element
  296. if (!idx.IsRange()) {
  297. result.push_back(node.GetElement(idx.GetFrom()));
  298. continue;
  299. }
  300. // Check bounds for second subscript
  301. if (idx.GetTo() < 0 || idx.GetTo() >= static_cast<i64>(arraySize)) {
  302. if (IsStrict()) {
  303. return MakeError(idx.GetToPos(), TIssuesIds::JSONPATH_ARRAY_INDEX_OUT_OF_BOUNDS, "Array index out of bounds");
  304. } else {
  305. continue;
  306. }
  307. }
  308. // In strict mode invalid ranges are forbidden
  309. if (idx.GetFrom() > idx.GetTo() && IsStrict()) {
  310. return MakeError(idx.GetFromPos(), TIssuesIds::JSONPATH_INVALID_ARRAY_INDEX_RANGE, "Range lower bound is greater than upper bound");
  311. }
  312. for (i64 i = idx.GetFrom(); i <= idx.GetTo(); i++) {
  313. result.push_back(node.GetElement(i));
  314. }
  315. }
  316. }
  317. return std::move(result);
  318. }
  319. TResult TExecutor::WildcardArrayAccess(const TJsonPathItem& item) {
  320. const auto input = Execute(Reader.ReadInput(item));
  321. if (input.IsError()) {
  322. return input;
  323. }
  324. TJsonNodes result;
  325. for (const auto& node : OptionalArrayWrapNodes(input.GetNodes())) {
  326. if (!node.IsArray()) {
  327. return MakeError(item, TIssuesIds::JSONPATH_EXPECTED_ARRAY, "Expected array");
  328. }
  329. auto it = node.GetArrayIterator();
  330. TValue value;
  331. while (it.Next(value)) {
  332. result.push_back(value);
  333. }
  334. }
  335. return std::move(result);
  336. }
  337. TResult TExecutor::UnaryArithmeticOp(const TJsonPathItem& item) {
  338. const auto& operandItem = Reader.ReadInput(item);
  339. const auto operandsResult = Execute(operandItem);
  340. if (operandsResult.IsError()) {
  341. return operandsResult;
  342. }
  343. const auto& operands = operandsResult.GetNodes();
  344. TJsonNodes result;
  345. result.reserve(operands.size());
  346. for (const auto& operand : operands) {
  347. if (!operand.IsNumber()) {
  348. return MakeError(
  349. operandItem, TIssuesIds::JSONPATH_INVALID_UNARY_OPERATION_ARGUMENT_TYPE,
  350. TStringBuilder() << "Unsupported type for unary operations"
  351. );
  352. }
  353. if (item.Type == EJsonPathItemType::UnaryPlus) {
  354. result.push_back(operand);
  355. continue;
  356. }
  357. const auto value = operand.GetNumber();
  358. result.push_back(TValue(MakeDouble(-value)));
  359. }
  360. return std::move(result);
  361. }
  362. TMaybe<TIssue> TExecutor::EnsureBinaryArithmeticOpArgument(TPosition pos, const TJsonNodes& nodes, double& result) {
  363. if (nodes.size() != 1) {
  364. return MakeError(pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for binary operation");
  365. }
  366. const auto& value = nodes[0];
  367. if (!value.IsNumber()) {
  368. return MakeError(
  369. pos, TIssuesIds::JSONPATH_INVALID_BINARY_OPERATION_ARGUMENT_TYPE,
  370. TStringBuilder() << "Unsupported type for binary operations"
  371. );
  372. }
  373. result = value.GetNumber();
  374. return Nothing();
  375. }
  376. TResult TExecutor::BinaryArithmeticOp(const TJsonPathItem& item) {
  377. const auto& leftItem = Reader.ReadLeftOperand(item);
  378. const auto leftResult = Execute(leftItem);
  379. if (leftResult.IsError()) {
  380. return leftResult;
  381. }
  382. double left = 0;
  383. TMaybe<TIssue> error = EnsureBinaryArithmeticOpArgument(leftItem.Pos, leftResult.GetNodes(), left);
  384. if (error) {
  385. return std::move(*error);
  386. }
  387. const auto& rightItem = Reader.ReadRightOperand(item);
  388. const auto rightResult = Execute(rightItem);
  389. if (rightResult.IsError()) {
  390. return rightResult;
  391. }
  392. double right = 0;
  393. error = EnsureBinaryArithmeticOpArgument(rightItem.Pos, rightResult.GetNodes(), right);
  394. if (error) {
  395. return std::move(*error);
  396. }
  397. double result = 0;
  398. switch (item.Type) {
  399. case EJsonPathItemType::BinaryAdd:
  400. result = left + right;
  401. break;
  402. case EJsonPathItemType::BinarySubstract:
  403. result = left - right;
  404. break;
  405. case EJsonPathItemType::BinaryMultiply:
  406. result = left * right;
  407. break;
  408. case EJsonPathItemType::BinaryDivide:
  409. if (IsZero(right)) {
  410. return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero");
  411. }
  412. result = left / right;
  413. break;
  414. case EJsonPathItemType::BinaryModulo:
  415. if (IsZero(right)) {
  416. return MakeError(rightItem, TIssuesIds::JSONPATH_DIVISION_BY_ZERO, "Division by zero");
  417. }
  418. result = std::fmod(left, right);
  419. break;
  420. default:
  421. YQL_ENSURE(false, "Expected binary arithmetic operation");
  422. }
  423. if (Y_UNLIKELY(std::isinf(result))) {
  424. return MakeError(item, TIssuesIds::JSONPATH_BINARY_OPERATION_RESULT_INFINITY, "Binary operation result is infinity");
  425. }
  426. return TJsonNodes({TValue(MakeDouble(result))});
  427. }
  428. TMaybe<TIssue> TExecutor::EnsureLogicalOpArgument(TPosition pos, const TJsonNodes& nodes, TMaybe<bool>& result) {
  429. if (nodes.size() != 1) {
  430. return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Expected exactly 1 item as an operand for logical operation");
  431. }
  432. const auto& value = nodes[0];
  433. if (value.IsNull()) {
  434. result = Nothing();
  435. } else if (value.IsBool()) {
  436. result = value.GetBool();
  437. } else {
  438. return MakeError(pos, TIssuesIds::JSONPATH_INVALID_LOGICAL_OPERATION_ARGUMENT, "Unsupported type for logical operation");
  439. }
  440. return Nothing();
  441. }
  442. TResult TExecutor::BinaryLogicalOp(const TJsonPathItem& item) {
  443. const auto& leftItem = Reader.ReadLeftOperand(item);
  444. const auto leftResult = Execute(leftItem);
  445. if (leftResult.IsError()) {
  446. return leftResult;
  447. }
  448. TMaybe<bool> left;
  449. TMaybe<TIssue> error = EnsureLogicalOpArgument(leftItem.Pos, leftResult.GetNodes(), left);
  450. if (error) {
  451. return std::move(*error);
  452. }
  453. const auto& rightItem = Reader.ReadRightOperand(item);
  454. const auto rightResult = Execute(rightItem);
  455. if (rightResult.IsError()) {
  456. return rightResult;
  457. }
  458. TMaybe<bool> right;
  459. error = EnsureLogicalOpArgument(rightItem.Pos, rightResult.GetNodes(), right);
  460. if (error) {
  461. return std::move(*error);
  462. }
  463. switch (item.Type) {
  464. case EJsonPathItemType::BinaryAnd: {
  465. /*
  466. AND truth table (taken from SQL JSON standard)
  467. | && | true | false | null |
  468. | ----- | ----- | ----- | ----- |
  469. | true | true | false | null |
  470. | false | false | false | false |
  471. | null | null | false | null |
  472. */
  473. if (left.Defined() && right.Defined()) {
  474. return TJsonNodes({TValue(MakeBool(*left && *right))});
  475. }
  476. const bool falseVsNull = !left.GetOrElse(true) && !right.Defined();
  477. const bool nullVsFalse = !right.GetOrElse(true) && !left.Defined();
  478. if (falseVsNull || nullVsFalse) {
  479. return TJsonNodes({TValue(MakeBool(false))});
  480. }
  481. return TJsonNodes({TValue(MakeEntity())});
  482. }
  483. case EJsonPathItemType::BinaryOr: {
  484. /*
  485. OR truth table (taken from SQL JSON standard)
  486. | || | true | false | null |
  487. | ----- | ----- | ----- | ----- |
  488. | true | true | true | true |
  489. | false | true | false | null |
  490. | null | true | null | null |
  491. */
  492. if (left.Defined() && right.Defined()) {
  493. return TJsonNodes({TValue(MakeBool(*left || *right))});
  494. }
  495. const bool trueVsNull = left.GetOrElse(false) && !right.Defined();
  496. const bool nullVsTrue = right.GetOrElse(false) && !left.Defined();
  497. if (trueVsNull || nullVsTrue) {
  498. return TJsonNodes({TValue(MakeBool(true))});
  499. }
  500. return TJsonNodes({TValue(MakeEntity())});
  501. }
  502. default:
  503. YQL_ENSURE(false, "Expected binary logical operation");
  504. }
  505. }
  506. TResult TExecutor::UnaryLogicalOp(const TJsonPathItem& item) {
  507. /*
  508. NOT truth table (taken from SQL JSON standard)
  509. | x | !x |
  510. | ----- | ----- |
  511. | true | false |
  512. | false | true |
  513. | null | null |
  514. */
  515. const auto& operandItem = Reader.ReadInput(item);
  516. const auto operandResult = Execute(operandItem);
  517. if (operandResult.IsError()) {
  518. return operandResult;
  519. }
  520. TMaybe<bool> operand;
  521. TMaybe<TIssue> error = EnsureLogicalOpArgument(operandItem.Pos, operandResult.GetNodes(), operand);
  522. if (error) {
  523. return std::move(*error);
  524. }
  525. if (!operand.Defined()) {
  526. return TJsonNodes({TValue(MakeEntity())});
  527. }
  528. return TJsonNodes({TValue(MakeBool(!(*operand)))});
  529. }
  530. TResult TExecutor::BooleanLiteral(const TJsonPathItem& item) {
  531. return TJsonNodes({TValue(MakeBool(item.GetBoolean()))});
  532. }
  533. TResult TExecutor::NullLiteral() {
  534. return TJsonNodes({TValue(MakeEntity())});
  535. }
  536. TResult TExecutor::StringLiteral(const TJsonPathItem& item) {
  537. return TJsonNodes({TValue(MakeString(item.GetString(), ValueBuilder))});
  538. }
  539. TMaybe<bool> TExecutor::CompareValues(const TValue& left, const TValue& right, EJsonPathItemType operation) {
  540. if (IsObjectOrArray(left) || IsObjectOrArray(right)) {
  541. // Comparisons of objects and arrays are prohibited
  542. return Nothing();
  543. }
  544. if (left.IsNull() && right.IsNull()) {
  545. // null == null is true, but all other comparisons are false
  546. return operation == EJsonPathItemType::BinaryEqual;
  547. }
  548. if (left.IsNull() || right.IsNull()) {
  549. // All operations between null and non-null are false
  550. return false;
  551. }
  552. auto doCompare = [&operation](const auto& left, const auto& right) {
  553. switch (operation) {
  554. case EJsonPathItemType::BinaryEqual:
  555. return left == right;
  556. case EJsonPathItemType::BinaryNotEqual:
  557. return left != right;
  558. case EJsonPathItemType::BinaryLess:
  559. return left < right;
  560. case EJsonPathItemType::BinaryLessEqual:
  561. return left <= right;
  562. case EJsonPathItemType::BinaryGreater:
  563. return left > right;
  564. case EJsonPathItemType::BinaryGreaterEqual:
  565. return left >= right;
  566. default:
  567. YQL_ENSURE(false, "Expected compare operation");
  568. }
  569. };
  570. if (left.IsBool() && right.IsBool()) {
  571. return doCompare(left.GetBool(), right.GetBool());
  572. } else if (left.IsString() && right.IsString()) {
  573. // NOTE: Strings are compared as byte arrays.
  574. // YQL does the same thing for UTF-8 strings and according to SQL/JSON
  575. // standard JsonPath must use the same semantics.
  576. //
  577. // However this is not correct in logical meaning. Let us consider strings:
  578. // - U+00e9 (LATIN SMALL LETTER E WITH ACUTE), 'é'
  579. // - U+0065 (LATIN SMALL LETTER E) U+0301 (COMBINING ACUTE ACCENT), `é`
  580. // Even though these two strings are different byte sequences, they are identical
  581. // from UTF-8 perspective.
  582. return doCompare(left.GetString(), right.GetString());
  583. }
  584. if (!left.IsNumber() || !right.IsNumber()) {
  585. return Nothing();
  586. }
  587. const auto leftNumber = left.GetNumber();
  588. const auto rightNumber = right.GetNumber();
  589. switch (operation) {
  590. case EJsonPathItemType::BinaryEqual:
  591. return IsEqual(leftNumber, rightNumber);
  592. case EJsonPathItemType::BinaryNotEqual:
  593. return !IsEqual(leftNumber, rightNumber);
  594. case EJsonPathItemType::BinaryLess:
  595. return IsLess(leftNumber, rightNumber);
  596. case EJsonPathItemType::BinaryLessEqual:
  597. return !IsGreater(leftNumber, rightNumber);
  598. case EJsonPathItemType::BinaryGreater:
  599. return IsGreater(leftNumber, rightNumber);
  600. case EJsonPathItemType::BinaryGreaterEqual:
  601. return !IsLess(leftNumber, rightNumber);
  602. default:
  603. YQL_ENSURE(false, "Expected compare operation");
  604. }
  605. }
  606. TResult TExecutor::CompareOp(const TJsonPathItem& item) {
  607. const auto& leftItem = Reader.ReadLeftOperand(item);
  608. const auto leftResult = Execute(leftItem);
  609. if (leftResult.IsError()) {
  610. return TJsonNodes({TValue(MakeEntity())});
  611. }
  612. const auto& rightItem = Reader.ReadRightOperand(item);
  613. const auto rightResult = Execute(rightItem);
  614. if (rightResult.IsError()) {
  615. return TJsonNodes({TValue(MakeEntity())});
  616. }
  617. const auto leftNodes = OptionalUnwrapArrays(leftResult.GetNodes());
  618. const auto rightNodes = OptionalUnwrapArrays(rightResult.GetNodes());
  619. bool error = false;
  620. bool found = false;
  621. for (const auto& left : leftNodes) {
  622. for (const auto& right : rightNodes) {
  623. const auto result = CompareValues(left, right, item.Type);
  624. if (!result.Defined()) {
  625. error = true;
  626. } else {
  627. found |= *result;
  628. }
  629. if (IsLax() && (error || found)) {
  630. break;
  631. }
  632. }
  633. if (IsLax() && (error || found)) {
  634. break;
  635. }
  636. }
  637. if (error) {
  638. return TJsonNodes({TValue(MakeEntity())});
  639. }
  640. return TJsonNodes({TValue(MakeBool(found))});
  641. }
  642. TResult TExecutor::FilterObject(const TJsonPathItem& item) {
  643. if (CurrentFilterObject.empty()) {
  644. return MakeError(item, TIssuesIds::JSONPATH_FILTER_OBJECT_OUTSIDE_OF_FILTER, "'@' is only allowed inside filters");
  645. }
  646. return TJsonNodes({CurrentFilterObject.top()});
  647. }
  648. TResult TExecutor::FilterPredicate(const TJsonPathItem& item) {
  649. const auto input = Execute(Reader.ReadInput(item));
  650. if (input.IsError()) {
  651. return input;
  652. }
  653. const auto& predicateItem = Reader.ReadFilterPredicate(item);
  654. TJsonNodes result;
  655. for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
  656. CurrentFilterObject.push(node);
  657. Y_DEFER {
  658. CurrentFilterObject.pop();
  659. };
  660. const auto predicateResult = Execute(predicateItem);
  661. if (predicateResult.IsError()) {
  662. continue;
  663. }
  664. const auto& predicateNodes = predicateResult.GetNodes();
  665. if (predicateNodes.size() != 1) {
  666. continue;
  667. }
  668. const auto& value = predicateNodes[0];
  669. if (value.IsBool() && value.GetBool()) {
  670. result.push_back(node);
  671. continue;
  672. }
  673. }
  674. return std::move(result);
  675. }
  676. TResult TExecutor::NumericMethod(const TJsonPathItem& item) {
  677. const auto& input = Execute(Reader.ReadInput(item));
  678. if (input.IsError()) {
  679. return input;
  680. }
  681. TJsonNodes result;
  682. for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
  683. if (!node.IsNumber()) {
  684. return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMERIC_METHOD_ARGUMENT, "Unsupported type for numeric method");
  685. }
  686. double applied = node.GetNumber();
  687. switch (item.Type) {
  688. case EJsonPathItemType::AbsMethod:
  689. applied = std::fabs(applied);
  690. break;
  691. case EJsonPathItemType::FloorMethod:
  692. applied = std::floor(applied);
  693. break;
  694. case EJsonPathItemType::CeilingMethod:
  695. applied = std::ceil(applied);
  696. break;
  697. default:
  698. YQL_ENSURE(false, "Expected numeric method");
  699. }
  700. result.push_back(TValue(MakeDouble(applied)));
  701. }
  702. return std::move(result);
  703. }
  704. TResult TExecutor::DoubleMethod(const TJsonPathItem& item) {
  705. const auto& input = Execute(Reader.ReadInput(item));
  706. if (input.IsError()) {
  707. return input;
  708. }
  709. TJsonNodes result;
  710. for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
  711. if (!node.IsString()) {
  712. return MakeError(item, TIssuesIds::JSONPATH_INVALID_DOUBLE_METHOD_ARGUMENT, "Unsupported type for double() method");
  713. }
  714. const double parsed = ParseDouble(node.GetString());
  715. if (std::isnan(parsed)) {
  716. return MakeError(item, TIssuesIds::JSONPATH_INVALID_NUMBER_STRING, "Error parsing number from string");
  717. }
  718. if (std::isinf(parsed)) {
  719. return MakeError(item, TIssuesIds::JSONPATH_INFINITE_NUMBER_STRING, "Parsed number is infinity");
  720. }
  721. result.push_back(TValue(MakeDouble(parsed)));
  722. }
  723. return std::move(result);
  724. }
  725. TResult TExecutor::TypeMethod(const TJsonPathItem& item) {
  726. const auto& input = Execute(Reader.ReadInput(item));
  727. if (input.IsError()) {
  728. return input;
  729. }
  730. TJsonNodes result;
  731. for (const auto& node : input.GetNodes()) {
  732. TStringBuf type;
  733. switch (node.GetType()) {
  734. case EValueType::Null:
  735. type = "null";
  736. break;
  737. case EValueType::Bool:
  738. type = "boolean";
  739. break;
  740. case EValueType::Number:
  741. type = "number";
  742. break;
  743. case EValueType::String:
  744. type = "string";
  745. break;
  746. case EValueType::Array:
  747. type = "array";
  748. break;
  749. case EValueType::Object:
  750. type = "object";
  751. break;
  752. }
  753. result.push_back(TValue(MakeString(type, ValueBuilder)));
  754. }
  755. return std::move(result);
  756. }
  757. TResult TExecutor::SizeMethod(const TJsonPathItem& item) {
  758. const auto& input = Execute(Reader.ReadInput(item));
  759. if (input.IsError()) {
  760. return input;
  761. }
  762. TJsonNodes result;
  763. for (const auto& node : input.GetNodes()) {
  764. ui64 size = 1;
  765. if (node.IsArray()) {
  766. size = node.GetSize();
  767. }
  768. result.push_back(TValue(MakeDouble(static_cast<double>(size))));
  769. }
  770. return std::move(result);
  771. }
  772. TResult TExecutor::KeyValueMethod(const TJsonPathItem& item) {
  773. const auto& input = Execute(Reader.ReadInput(item));
  774. if (input.IsError()) {
  775. return input;
  776. }
  777. TJsonNodes result;
  778. TPair row[2];
  779. TPair& nameEntry = row[0];
  780. TPair& valueEntry = row[1];
  781. for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
  782. if (!node.IsObject()) {
  783. return MakeError(item, TIssuesIds::JSONPATH_INVALID_KEYVALUE_METHOD_ARGUMENT, "Unsupported type for keyvalue() method");
  784. }
  785. TValue key;
  786. TValue value;
  787. auto it = node.GetObjectIterator();
  788. while (it.Next(key, value)) {
  789. nameEntry.first = MakeString("name", ValueBuilder);
  790. nameEntry.second = key.ConvertToUnboxedValue(ValueBuilder);
  791. valueEntry.first = MakeString("value", ValueBuilder);
  792. valueEntry.second = value.ConvertToUnboxedValue(ValueBuilder);
  793. result.push_back(TValue(MakeDict(row, 2)));
  794. }
  795. }
  796. return std::move(result);
  797. }
  798. TResult TExecutor::StartsWithPredicate(const TJsonPathItem& item) {
  799. const auto& input = Execute(Reader.ReadInput(item));
  800. if (input.IsError()) {
  801. return input;
  802. }
  803. const auto& inputNodes = input.GetNodes();
  804. if (inputNodes.size() != 1) {
  805. return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Expected exactly 1 item as input argument for starts with predicate");
  806. }
  807. const auto& inputString = inputNodes[0];
  808. if (!inputString.IsString()) {
  809. return MakeError(item, TIssuesIds::JSONPATH_INVALID_STARTS_WITH_ARGUMENT, "Type of input argument for starts with predicate must be string");
  810. }
  811. const auto prefix = Execute(Reader.ReadPrefix(item));
  812. if (prefix.IsError()) {
  813. return prefix;
  814. }
  815. bool error = false;
  816. bool found = false;
  817. for (const auto& node : prefix.GetNodes()) {
  818. if (node.IsString()) {
  819. found |= inputString.GetString().StartsWith(node.GetString());
  820. } else {
  821. error = true;
  822. }
  823. if (IsLax() && (found || error)) {
  824. break;
  825. }
  826. }
  827. if (error) {
  828. return TJsonNodes({TValue(MakeEntity())});
  829. }
  830. return TJsonNodes({TValue(MakeBool(found))});
  831. }
  832. TResult TExecutor::IsUnknownPredicate(const TJsonPathItem& item) {
  833. const auto input = Execute(Reader.ReadInput(item));
  834. if (input.IsError()) {
  835. return input;
  836. }
  837. const auto& nodes = input.GetNodes();
  838. if (nodes.size() != 1) {
  839. return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "Expected exactly 1 item as an argument for is unknown predicate");
  840. }
  841. const auto& node = nodes[0];
  842. if (node.IsNull()) {
  843. return TJsonNodes({TValue(MakeBool(true))});
  844. }
  845. if (!node.IsBool()) {
  846. return MakeError(item, TIssuesIds::JSONPATH_INVALID_IS_UNKNOWN_ARGUMENT, "is unknown predicate supports only bool and null types for its argument");
  847. }
  848. return TJsonNodes({TValue(MakeBool(false))});
  849. }
  850. TResult TExecutor::ExistsPredicate(const TJsonPathItem& item) {
  851. const auto input = Execute(Reader.ReadInput(item));
  852. if (input.IsError()) {
  853. return TJsonNodes({TValue(MakeEntity())});
  854. }
  855. const auto& nodes = input.GetNodes();
  856. return TJsonNodes({TValue(MakeBool(!nodes.empty()))});
  857. }
  858. TResult TExecutor::LikeRegexPredicate(const TJsonPathItem& item) {
  859. const auto input = Execute(Reader.ReadInput(item));
  860. if (input.IsError()) {
  861. return input;
  862. }
  863. const auto& regex = item.GetRegex();
  864. bool error = false;
  865. bool found = false;
  866. for (const auto& node : OptionalUnwrapArrays(input.GetNodes())) {
  867. if (node.IsString()) {
  868. found |= regex->Matches(node.GetString());
  869. } else {
  870. error = true;
  871. }
  872. if (IsLax() && (found || error)) {
  873. break;
  874. }
  875. }
  876. if (error) {
  877. return TJsonNodes({TValue(MakeEntity())});
  878. }
  879. return TJsonNodes({TValue(MakeBool(found))});
  880. }
  881. TJsonNodes TExecutor::OptionalUnwrapArrays(const TJsonNodes& input) {
  882. if (IsStrict()) {
  883. return input;
  884. }
  885. TJsonNodes result;
  886. for (const auto& node : input) {
  887. if (!node.IsArray()) {
  888. result.push_back(node);
  889. continue;
  890. }
  891. auto it = node.GetArrayIterator();
  892. TValue value;
  893. while (it.Next(value)) {
  894. result.push_back(value);
  895. }
  896. }
  897. return result;
  898. }
  899. TJsonNodes TExecutor::OptionalArrayWrapNodes(const TJsonNodes& input) {
  900. if (IsStrict()) {
  901. return input;
  902. }
  903. TJsonNodes result;
  904. for (const auto& node : input) {
  905. if (node.IsArray()) {
  906. result.push_back(node);
  907. continue;
  908. }
  909. TUnboxedValue nodeCopy(node.ConvertToUnboxedValue(ValueBuilder));
  910. result.push_back(TValue(MakeList(&nodeCopy, 1, ValueBuilder)));
  911. }
  912. return result;
  913. }
  914. }