123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604 |
- #include "binary.h"
- #include <yql/essentials/utils/yql_panic.h>
- namespace NYql::NJsonPath {
- bool TArraySubscriptOffsets::IsRange() const {
- return ToOffset > 0;
- }
- const TStringBuf TJsonPathItem::GetString() const {
- return std::get<TStringBuf>(Data);
- }
- const TVector<TArraySubscriptOffsets>& TJsonPathItem::GetSubscripts() const {
- return std::get<TVector<TArraySubscriptOffsets>>(Data);
- }
- const TBinaryOpArgumentsOffset& TJsonPathItem::GetBinaryOpArguments() const {
- return std::get<TBinaryOpArgumentsOffset>(Data);
- }
- double TJsonPathItem::GetNumber() const {
- return std::get<double>(Data);
- }
- bool TJsonPathItem::GetBoolean() const {
- return std::get<bool>(Data);
- }
- TFilterPredicateOffset TJsonPathItem::GetFilterPredicateOffset() const {
- return std::get<TFilterPredicateOffset>(Data);
- }
- TStartsWithPrefixOffset TJsonPathItem::GetStartsWithPrefixOffset() const {
- return std::get<TStartsWithPrefixOffset>(Data);
- }
- const NReWrapper::IRePtr& TJsonPathItem::GetRegex() const {
- return std::get<NReWrapper::IRePtr>(Data);
- }
- TJsonPathReader::TJsonPathReader(const TJsonPathPtr path)
- : Path(path)
- , InitialPos(0)
- , Mode(ReadMode(InitialPos))
- {
- }
- const TJsonPathItem& TJsonPathReader::ReadFirst() {
- return ReadFromPos(InitialPos);
- }
- const TJsonPathItem& TJsonPathReader::ReadInput(const TJsonPathItem& item) {
- YQL_ENSURE(item.InputItemOffset.Defined());
- return ReadFromPos(*item.InputItemOffset);
- }
- const TJsonPathItem& TJsonPathReader::ReadFromSubscript(const TArraySubscriptOffsets& subscript) {
- return ReadFromPos(subscript.FromOffset);
- }
- const TJsonPathItem& TJsonPathReader::ReadToSubscript(const TArraySubscriptOffsets& subscript) {
- YQL_ENSURE(subscript.IsRange());
- return ReadFromPos(subscript.ToOffset);
- }
- const TJsonPathItem& TJsonPathReader::ReadLeftOperand(const TJsonPathItem& node) {
- return ReadFromPos(node.GetBinaryOpArguments().LeftOffset);
- }
- const TJsonPathItem& TJsonPathReader::ReadRightOperand(const TJsonPathItem& node) {
- return ReadFromPos(node.GetBinaryOpArguments().RightOffset);
- }
- const TJsonPathItem& TJsonPathReader::ReadFilterPredicate(const TJsonPathItem& node) {
- return ReadFromPos(node.GetFilterPredicateOffset().Offset);
- }
- const TJsonPathItem& TJsonPathReader::ReadPrefix(const TJsonPathItem& node) {
- return ReadFromPos(node.GetStartsWithPrefixOffset().Offset);
- }
- EJsonPathMode TJsonPathReader::GetMode() const {
- return Mode;
- }
- const TJsonPathItem& TJsonPathReader::ReadFromPos(TUint pos) {
- YQL_ENSURE(pos < Path->Size());
- const auto it = ItemCache.find(pos);
- if (it != ItemCache.end()) {
- return it->second;
- }
- TJsonPathItem& result = ItemCache[pos];
- result.Type = ReadType(pos);
- const auto row = ReadUint(pos);
- const auto column = ReadUint(pos);
- result.Pos = TPosition(column, row, "jsonpath");
- switch (result.Type) {
- // Items without input
- case EJsonPathItemType::FilterObject:
- case EJsonPathItemType::NullLiteral:
- case EJsonPathItemType::ContextObject:
- case EJsonPathItemType::LastArrayIndex:
- break;
- case EJsonPathItemType::Variable:
- case EJsonPathItemType::StringLiteral:
- result.Data = ReadString(pos);
- break;
- case EJsonPathItemType::NumberLiteral:
- result.Data = ReadDouble(pos);
- break;
- case EJsonPathItemType::BooleanLiteral:
- result.Data = ReadBool(pos);
- break;
- // Items with single input
- case EJsonPathItemType::TypeMethod:
- case EJsonPathItemType::SizeMethod:
- case EJsonPathItemType::KeyValueMethod:
- case EJsonPathItemType::AbsMethod:
- case EJsonPathItemType::FloorMethod:
- case EJsonPathItemType::CeilingMethod:
- case EJsonPathItemType::DoubleMethod:
- case EJsonPathItemType::WildcardArrayAccess:
- case EJsonPathItemType::WildcardMemberAccess:
- case EJsonPathItemType::UnaryMinus:
- case EJsonPathItemType::UnaryPlus:
- case EJsonPathItemType::UnaryNot:
- case EJsonPathItemType::IsUnknownPredicate:
- case EJsonPathItemType::ExistsPredicate:
- result.InputItemOffset = ReadUint(pos);
- break;
- case EJsonPathItemType::MemberAccess:
- result.Data = ReadString(pos);
- result.InputItemOffset = ReadUint(pos);
- break;
- case EJsonPathItemType::ArrayAccess:
- result.Data = ReadSubscripts(pos);
- result.InputItemOffset = ReadUint(pos);
- break;
- case EJsonPathItemType::FilterPredicate:
- result.Data = TFilterPredicateOffset{ReadUint(pos)};
- result.InputItemOffset = ReadUint(pos);
- break;
- case EJsonPathItemType::StartsWithPredicate:
- result.Data = TStartsWithPrefixOffset{ReadUint(pos)};
- result.InputItemOffset = ReadUint(pos);
- break;
- case EJsonPathItemType::LikeRegexPredicate: {
- const auto serializedRegex = ReadString(pos);
- auto regex = NReWrapper::NDispatcher::Deserialize(serializedRegex);
- result.Data = std::move(regex);
- result.InputItemOffset = ReadUint(pos);
- break;
- }
- // Items with 2 inputs
- case EJsonPathItemType::BinaryAdd:
- case EJsonPathItemType::BinarySubstract:
- case EJsonPathItemType::BinaryMultiply:
- case EJsonPathItemType::BinaryDivide:
- case EJsonPathItemType::BinaryModulo:
- case EJsonPathItemType::BinaryLess:
- case EJsonPathItemType::BinaryLessEqual:
- case EJsonPathItemType::BinaryGreater:
- case EJsonPathItemType::BinaryGreaterEqual:
- case EJsonPathItemType::BinaryEqual:
- case EJsonPathItemType::BinaryNotEqual:
- case EJsonPathItemType::BinaryAnd:
- case EJsonPathItemType::BinaryOr:
- TBinaryOpArgumentsOffset data;
- data.LeftOffset = ReadUint(pos);
- data.RightOffset = ReadUint(pos);
- result.Data = data;
- break;
- }
- return result;
- }
- TUint TJsonPathReader::ReadUint(TUint& pos) {
- return ReadPOD<TUint>(pos);
- }
- double TJsonPathReader::ReadDouble(TUint& pos) {
- return ReadPOD<double>(pos);
- }
- bool TJsonPathReader::ReadBool(TUint& pos) {
- return ReadPOD<bool>(pos);
- }
- EJsonPathItemType TJsonPathReader::ReadType(TUint& pos) {
- return static_cast<EJsonPathItemType>(ReadUint(pos));
- }
- EJsonPathMode TJsonPathReader::ReadMode(TUint& pos) {
- return static_cast<EJsonPathMode>(ReadUint(pos));
- }
- const TStringBuf TJsonPathReader::ReadString(TUint& pos) {
- TUint length = ReadUint(pos);
- TStringBuf result(Path->Begin() + pos, length);
- pos += length;
- return result;
- }
- TVector<TArraySubscriptOffsets> TJsonPathReader::ReadSubscripts(TUint& pos) {
- const auto count = ReadUint(pos);
- TVector<TArraySubscriptOffsets> result(count);
- for (size_t i = 0; i < count; i++) {
- result[i].FromOffset = ReadUint(pos);
- result[i].ToOffset = ReadUint(pos);
- }
- return result;
- }
- void TJsonPathBuilder::VisitRoot(const TRootNode& node) {
- // Block structure:
- // <(1) TUint>
- // Components:
- // (1) Must be casted to EJsonPathMode. Jsonpath execution mode
- WriteMode(node.GetMode());
- node.GetExpr()->Accept(*this);
- }
- void TJsonPathBuilder::VisitContextObject(const TContextObjectNode& node) {
- WriteZeroInputItem(EJsonPathItemType::ContextObject, node);
- }
- void TJsonPathBuilder::VisitVariable(const TVariableNode& node) {
- WriteZeroInputItem(EJsonPathItemType::Variable, node);
- WriteString(node.GetName());
- }
- void TJsonPathBuilder::VisitLastArrayIndex(const TLastArrayIndexNode& node) {
- WriteZeroInputItem(EJsonPathItemType::LastArrayIndex, node);
- }
- void TJsonPathBuilder::VisitNumberLiteral(const TNumberLiteralNode& node) {
- WriteZeroInputItem(EJsonPathItemType::NumberLiteral, node);
- WriteDouble(node.GetValue());
- }
- void TJsonPathBuilder::VisitMemberAccess(const TMemberAccessNode& node) {
- // Block structure:
- // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint>
- // Components:
- // (1) Must be casted to EJsonPathItemType. Member access item type
- // (2) Row of the position in the source jsonpath
- // (3) Column of the position in the source jsonpath
- // (4) Length of member name string
- // (5) Member name string
- // (6) Offset of the input item
- WriteType(EJsonPathItemType::MemberAccess);
- WritePos(node);
- WriteString(node.GetMember());
- WriteNextPosition();
- node.GetInput()->Accept(*this);
- }
- void TJsonPathBuilder::VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) {
- WriteSingleInputItem(EJsonPathItemType::WildcardMemberAccess, node, node.GetInput());
- }
- void TJsonPathBuilder::VisitArrayAccess(const TArrayAccessNode& node) {
- // Block structure:
- // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) pair<TUint, TUint>[]> <(6) TUint> <(7) items>
- // Components:
- // (1) Must be casted to EJsonPathItemType. Array access item type
- // (2) Row of the position in the source jsonpath
- // (3) Column of the position in the source jsonpath
- // (4) Count of subscripts stored
- // (5) Array of pairs with offsets to subscript items. If subscript is a single index, only first element
- // is set to it's offset and second is zero. If subscript is a range, both pair elements are valid offsets
- // to the elements of range (lower and upper bound).
- // (6) Offset of the input item
- // (7) Array of subcsripts. For details about encoding see VisitArraySubscript
- WriteType(EJsonPathItemType::ArrayAccess);
- WritePos(node);
- // (4) Write count of subscripts stored
- const auto& subscripts = node.GetSubscripts();
- const auto count = subscripts.size();
- WriteUint(count);
- // (5) We do not know sizes of each subscript. Write array of zeros for offsets
- const auto indexStart = CurrentEndPos();
- TVector<TUint> offsets(2 * count);
- WriteUintSequence(offsets);
- // (6) Reserve space for input offset to rewrite it later
- const auto inputStart = CurrentEndPos();
- WriteFinishPosition();
- // (7) Write all subscripts and record offset for each of them
- for (size_t i = 0; i < count; i++) {
- offsets[2 * i] = CurrentEndPos();
- subscripts[i].From->Accept(*this);
- if (subscripts[i].To) {
- offsets[2 * i + 1] = CurrentEndPos();
- subscripts[i].To->Accept(*this);
- }
- }
- // (5) Rewrite offsets with correct values
- RewriteUintSequence(offsets, indexStart);
- // (6) Rewrite input offset
- RewriteUint(CurrentEndPos(), inputStart);
- node.GetInput()->Accept(*this);
- }
- void TJsonPathBuilder::VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) {
- WriteSingleInputItem(EJsonPathItemType::WildcardArrayAccess, node, node.GetInput());
- }
- void TJsonPathBuilder::VisitUnaryOperation(const TUnaryOperationNode& node) {
- EJsonPathItemType type;
- switch (node.GetOp()) {
- case EUnaryOperation::Plus:
- type = EJsonPathItemType::UnaryPlus;
- break;
- case EUnaryOperation::Minus:
- type = EJsonPathItemType::UnaryMinus;
- break;
- case EUnaryOperation::Not:
- type = EJsonPathItemType::UnaryNot;
- break;
- }
- WriteSingleInputItem(type, node, node.GetExpr());
- }
- void TJsonPathBuilder::VisitBinaryOperation(const TBinaryOperationNode& node) {
- EJsonPathItemType type;
- switch (node.GetOp()) {
- case EBinaryOperation::Add:
- type = EJsonPathItemType::BinaryAdd;
- break;
- case EBinaryOperation::Substract:
- type = EJsonPathItemType::BinarySubstract;
- break;
- case EBinaryOperation::Multiply:
- type = EJsonPathItemType::BinaryMultiply;
- break;
- case EBinaryOperation::Divide:
- type = EJsonPathItemType::BinaryDivide;
- break;
- case EBinaryOperation::Modulo:
- type = EJsonPathItemType::BinaryModulo;
- break;
- case EBinaryOperation::Less:
- type = EJsonPathItemType::BinaryLess;
- break;
- case EBinaryOperation::LessEqual:
- type = EJsonPathItemType::BinaryLessEqual;
- break;
- case EBinaryOperation::Greater:
- type = EJsonPathItemType::BinaryGreater;
- break;
- case EBinaryOperation::GreaterEqual:
- type = EJsonPathItemType::BinaryGreaterEqual;
- break;
- case EBinaryOperation::Equal:
- type = EJsonPathItemType::BinaryEqual;
- break;
- case EBinaryOperation::NotEqual:
- type = EJsonPathItemType::BinaryNotEqual;
- break;
- case EBinaryOperation::And:
- type = EJsonPathItemType::BinaryAnd;
- break;
- case EBinaryOperation::Or:
- type = EJsonPathItemType::BinaryOr;
- break;
- }
- WriteTwoInputsItem(type, node, node.GetLeftExpr(), node.GetRightExpr());
- }
- void TJsonPathBuilder::VisitBooleanLiteral(const TBooleanLiteralNode& node) {
- WriteZeroInputItem(EJsonPathItemType::BooleanLiteral, node);
- WriteBool(node.GetValue());
- }
- void TJsonPathBuilder::VisitNullLiteral(const TNullLiteralNode& node) {
- WriteZeroInputItem(EJsonPathItemType::NullLiteral, node);
- }
- void TJsonPathBuilder::VisitStringLiteral(const TStringLiteralNode& node) {
- WriteZeroInputItem(EJsonPathItemType::StringLiteral, node);
- WriteString(node.GetValue());
- }
- void TJsonPathBuilder::VisitFilterObject(const TFilterObjectNode& node) {
- WriteZeroInputItem(EJsonPathItemType::FilterObject, node);
- }
- void TJsonPathBuilder::VisitFilterPredicate(const TFilterPredicateNode& node) {
- WriteTwoInputsItem(EJsonPathItemType::FilterPredicate, node, node.GetPredicate(), node.GetInput());
- }
- void TJsonPathBuilder::VisitMethodCall(const TMethodCallNode& node) {
- EJsonPathItemType type;
- switch (node.GetType()) {
- case EMethodType::Abs:
- type = EJsonPathItemType::AbsMethod;
- break;
- case EMethodType::Floor:
- type = EJsonPathItemType::FloorMethod;
- break;
- case EMethodType::Ceiling:
- type = EJsonPathItemType::CeilingMethod;
- break;
- case EMethodType::Double:
- type = EJsonPathItemType::DoubleMethod;
- break;
- case EMethodType::Type:
- type = EJsonPathItemType::TypeMethod;
- break;
- case EMethodType::Size:
- type = EJsonPathItemType::SizeMethod;
- break;
- case EMethodType::KeyValue:
- type = EJsonPathItemType::KeyValueMethod;
- break;
- }
- WriteSingleInputItem(type, node, node.GetInput());
- }
- TJsonPathPtr TJsonPathBuilder::ShrinkAndGetResult() {
- Result->ShrinkToFit();
- return Result;
- }
- void TJsonPathBuilder::VisitStartsWithPredicate(const TStartsWithPredicateNode& node) {
- WriteTwoInputsItem(EJsonPathItemType::StartsWithPredicate, node, node.GetPrefix(), node.GetInput());
- }
- void TJsonPathBuilder::VisitExistsPredicate(const TExistsPredicateNode& node) {
- WriteSingleInputItem(EJsonPathItemType::ExistsPredicate, node, node.GetInput());
- }
- void TJsonPathBuilder::VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) {
- WriteSingleInputItem(EJsonPathItemType::IsUnknownPredicate, node, node.GetInput());
- }
- void TJsonPathBuilder::VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) {
- // Block structure:
- // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) char[]> <(6) TUint>
- // Components:
- // (1) Must be casted to EJsonPathItemType. Member access item type
- // (2) Row of the position in the source jsonpath
- // (3) Column of the position in the source jsonpath
- // (4) Length of serialized Hyperscan database
- // (5) Serialized Hyperscan database
- // (6) Offset of the input item
- WriteType(EJsonPathItemType::LikeRegexPredicate);
- WritePos(node);
- const TString serializedRegex = node.GetRegex()->Serialize();
- WriteString(serializedRegex);
- WriteNextPosition();
- node.GetInput()->Accept(*this);
- }
- void TJsonPathBuilder::WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node) {
- // Block structure:
- // <(1) TUint> <(2) TUint> <(3) TUint>
- // Components:
- // (1) Item type
- // (2) Row of the position in the source jsonpath
- // (3) Column of the position in the source jsonpath
- WriteType(type);
- WritePos(node);
- }
- void TJsonPathBuilder::WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input) {
- // Block structure:
- // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) item>
- // Components:
- // (1) Item type
- // (2) Row of the position in the source jsonpath
- // (3) Column of the position in the source jsonpath
- // (4) Offset of the input item
- // (5) Input item
- WriteZeroInputItem(type, node);
- WriteNextPosition();
- input->Accept(*this);
- }
- void TJsonPathBuilder::WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput) {
- // Block structure:
- // <(1) TUint> <(2) TUint> <(3) TUint> <(4) TUint> <(5) TUint> <(6) item> <(7) item>
- // Components:
- // (1) Item type
- // (2) Row of the position in the source jsonpath
- // (3) Column of the position in the source jsonpath
- // (4) Offset of the first input
- // (5) Offset of the second input
- // (6) JsonPath item representing first input
- // (7) JsonPath item representing right input
- WriteZeroInputItem(type, node);
- // (4) and (5) Fill offsets with zeros
- const auto indexStart = CurrentEndPos();
- WriteUint(0);
- WriteUint(0);
- // (6) Write first input and record it's offset
- const auto firstInputStart = CurrentEndPos();
- firstInput->Accept(*this);
- // (7) Write second input and record it's offset
- const auto secondInputStart = CurrentEndPos();
- secondInput->Accept(*this);
- // (4) and (5) Rewrite offsets with correct values
- RewriteUintSequence({firstInputStart, secondInputStart}, indexStart);
- }
- void TJsonPathBuilder::WritePos(const TAstNode& node) {
- WriteUint(node.GetPos().Row);
- WriteUint(node.GetPos().Column);
- }
- void TJsonPathBuilder::WriteType(EJsonPathItemType type) {
- WriteUint(static_cast<TUint>(type));
- }
- void TJsonPathBuilder::WriteMode(EJsonPathMode mode) {
- WriteUint(static_cast<TUint>(mode));
- }
- void TJsonPathBuilder::WriteNextPosition() {
- WriteUint(CurrentEndPos() + sizeof(TUint));
- }
- void TJsonPathBuilder::WriteFinishPosition() {
- WriteUint(0);
- }
- void TJsonPathBuilder::WriteString(TStringBuf value) {
- WriteUint(value.size());
- Result->Append(value.data(), value.size());
- }
- void TJsonPathBuilder::RewriteUintSequence(const TVector<TUint>& sequence, TUint offset) {
- const auto length = sequence.size() * sizeof(TUint);
- Y_ASSERT(offset + length < CurrentEndPos());
- MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(sequence.data()), length);
- }
- void TJsonPathBuilder::WriteUintSequence(const TVector<TUint>& sequence) {
- const auto length = sequence.size() * sizeof(TUint);
- Result->Append(reinterpret_cast<const char*>(sequence.data()), length);
- }
- void TJsonPathBuilder::RewriteUint(TUint value, TUint offset) {
- Y_ASSERT(offset + sizeof(TUint) < CurrentEndPos());
- MemCopy(Result->Data() + offset, reinterpret_cast<const char*>(&value), sizeof(TUint));
- }
- void TJsonPathBuilder::WriteUint(TUint value) {
- WritePOD(value);
- }
- void TJsonPathBuilder::WriteDouble(double value) {
- WritePOD(value);
- }
- void TJsonPathBuilder::WriteBool(bool value) {
- WritePOD(value);
- }
- TUint TJsonPathBuilder::CurrentEndPos() const {
- return Result->Size();
- }
- }
|