binary.h 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. #pragma once
  2. #include "ast_nodes.h"
  3. #include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
  4. #include <util/system/unaligned_mem.h>
  5. #include <util/generic/buffer.h>
  6. #include <util/generic/ptr.h>
  7. #include <util/generic/maybe.h>
  8. #include <util/generic/hash.h>
  9. #include <variant>
  10. #include <type_traits>
  11. namespace NYql::NJsonPath {
  12. class TJsonPath : public TSimpleRefCount<TJsonPath>, public TBuffer {
  13. };
  14. using TJsonPathPtr = TIntrusivePtr<TJsonPath>;
  15. using TUint = ui64;
  16. enum class EJsonPathItemType {
  17. MemberAccess = 0,
  18. WildcardMemberAccess = 1,
  19. ArrayAccess = 2,
  20. WildcardArrayAccess = 3,
  21. ContextObject = 4,
  22. NumberLiteral = 5,
  23. LastArrayIndex = 6,
  24. UnaryPlus = 7,
  25. UnaryMinus = 8,
  26. BinaryAdd = 9,
  27. BinarySubstract = 10,
  28. BinaryMultiply = 11,
  29. BinaryDivide = 12,
  30. BinaryModulo = 13,
  31. Variable = 14,
  32. BinaryLess = 15,
  33. BinaryLessEqual = 16,
  34. BinaryGreater = 17,
  35. BinaryGreaterEqual = 18,
  36. BinaryEqual = 19,
  37. BinaryNotEqual = 20,
  38. BinaryAnd = 21,
  39. BinaryOr = 22,
  40. UnaryNot = 23,
  41. BooleanLiteral = 24,
  42. NullLiteral = 25,
  43. StringLiteral = 26,
  44. FilterObject = 27,
  45. FilterPredicate = 28,
  46. AbsMethod = 29,
  47. FloorMethod = 30,
  48. CeilingMethod = 31,
  49. DoubleMethod = 32,
  50. TypeMethod = 33,
  51. SizeMethod = 34,
  52. KeyValueMethod = 35,
  53. StartsWithPredicate = 36,
  54. ExistsPredicate = 37,
  55. IsUnknownPredicate = 38,
  56. LikeRegexPredicate = 39,
  57. };
  58. struct TArraySubscriptOffsets {
  59. TUint FromOffset = 0;
  60. TUint ToOffset = 0;
  61. bool IsRange() const;
  62. };
  63. struct TBinaryOpArgumentsOffset {
  64. TUint LeftOffset = 0;
  65. TUint RightOffset = 0;
  66. };
  67. struct TFilterPredicateOffset {
  68. TUint Offset = 0;
  69. };
  70. struct TStartsWithPrefixOffset {
  71. TUint Offset = 0;
  72. };
  73. struct TJsonPathItem {
  74. // Position in the source jsonpath
  75. TPosition Pos;
  76. // Type of item
  77. EJsonPathItemType Type;
  78. // Offset in buffer pointing to the input item
  79. TMaybe<TUint> InputItemOffset;
  80. // Data associated with this item. To determine which variant
  81. // type was filled callee must examine Type field.
  82. // WARNING: Some item types do not fill Data field at all! You must
  83. // check item type before accesing this field.
  84. std::variant<
  85. TStringBuf,
  86. TVector<TArraySubscriptOffsets>,
  87. TBinaryOpArgumentsOffset,
  88. TFilterPredicateOffset,
  89. TStartsWithPrefixOffset,
  90. NReWrapper::IRePtr,
  91. double,
  92. bool
  93. > Data;
  94. const TStringBuf GetString() const;
  95. const TVector<TArraySubscriptOffsets>& GetSubscripts() const;
  96. const TBinaryOpArgumentsOffset& GetBinaryOpArguments() const;
  97. const NReWrapper::IRePtr& GetRegex() const;
  98. double GetNumber() const;
  99. bool GetBoolean() const;
  100. TFilterPredicateOffset GetFilterPredicateOffset() const;
  101. TStartsWithPrefixOffset GetStartsWithPrefixOffset() const;
  102. // Pointer to the binary representation of jsonpath.
  103. // We do not use this directly but Data field can reference to it.
  104. // For example if this item is a string then Data contains TStringBuf
  105. // pointing to some part inside buffer. We must ensure that it is not
  106. // destructed while this item is alive so we keep shared pointer to it.
  107. const TJsonPathPtr JsonPath;
  108. };
  109. class TJsonPathBuilder : public IAstNodeVisitor {
  110. public:
  111. TJsonPathBuilder()
  112. : Result(new TJsonPath())
  113. {
  114. }
  115. void VisitRoot(const TRootNode& node) override;
  116. void VisitContextObject(const TContextObjectNode& node) override;
  117. void VisitVariable(const TVariableNode& node) override;
  118. void VisitLastArrayIndex(const TLastArrayIndexNode& node) override;
  119. void VisitNumberLiteral(const TNumberLiteralNode& node) override;
  120. void VisitMemberAccess(const TMemberAccessNode& node) override;
  121. void VisitWildcardMemberAccess(const TWildcardMemberAccessNode& node) override;
  122. void VisitArrayAccess(const TArrayAccessNode& node) override;
  123. void VisitWildcardArrayAccess(const TWildcardArrayAccessNode& node) override;
  124. void VisitUnaryOperation(const TUnaryOperationNode& node) override;
  125. void VisitBinaryOperation(const TBinaryOperationNode& node) override;
  126. void VisitBooleanLiteral(const TBooleanLiteralNode& node) override;
  127. void VisitNullLiteral(const TNullLiteralNode& node) override;
  128. void VisitStringLiteral(const TStringLiteralNode& node) override;
  129. void VisitFilterObject(const TFilterObjectNode& node) override;
  130. void VisitFilterPredicate(const TFilterPredicateNode& node) override;
  131. void VisitMethodCall(const TMethodCallNode& node) override;
  132. void VisitStartsWithPredicate(const TStartsWithPredicateNode& node) override;
  133. void VisitExistsPredicate(const TExistsPredicateNode& node) override;
  134. void VisitIsUnknownPredicate(const TIsUnknownPredicateNode& node) override;
  135. void VisitLikeRegexPredicate(const TLikeRegexPredicateNode& node) override;
  136. TJsonPathPtr ShrinkAndGetResult();
  137. private:
  138. void WriteZeroInputItem(EJsonPathItemType type, const TAstNode& node);
  139. void WriteSingleInputItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr input);
  140. void WriteTwoInputsItem(EJsonPathItemType type, const TAstNode& node, const TAstNodePtr firstInput, const TAstNodePtr secondInput);
  141. void WritePos(const TAstNode& node);
  142. void WriteType(EJsonPathItemType type);
  143. void WriteMode(EJsonPathMode mode);
  144. void WriteNextPosition();
  145. void WriteFinishPosition();
  146. void WriteString(TStringBuf value);
  147. void RewriteUintSequence(const TVector<TUint>& sequence, TUint offset);
  148. void WriteUintSequence(const TVector<TUint>& sequence);
  149. void RewriteUint(TUint value, TUint offset);
  150. void WriteUint(TUint value);
  151. void WriteDouble(double value);
  152. void WriteBool(bool value);
  153. template <typename T>
  154. void WritePOD(const T& value) {
  155. static_assert(std::is_pod_v<T>, "Type must be POD");
  156. Result->Append(reinterpret_cast<const char*>(&value), sizeof(T));
  157. }
  158. TUint CurrentEndPos() const;
  159. TJsonPathPtr Result;
  160. };
  161. class TJsonPathReader {
  162. public:
  163. TJsonPathReader(const TJsonPathPtr path);
  164. const TJsonPathItem& ReadFirst();
  165. const TJsonPathItem& ReadInput(const TJsonPathItem& node);
  166. const TJsonPathItem& ReadFromSubscript(const TArraySubscriptOffsets& subscript);
  167. const TJsonPathItem& ReadToSubscript(const TArraySubscriptOffsets& subscript);
  168. const TJsonPathItem& ReadLeftOperand(const TJsonPathItem& node);
  169. const TJsonPathItem& ReadRightOperand(const TJsonPathItem& node);
  170. const TJsonPathItem& ReadFilterPredicate(const TJsonPathItem& node);
  171. const TJsonPathItem& ReadPrefix(const TJsonPathItem& node);
  172. EJsonPathMode GetMode() const;
  173. private:
  174. const TJsonPathItem& ReadFromPos(TUint pos);
  175. TUint ReadUint(TUint& pos);
  176. double ReadDouble(TUint& pos);
  177. bool ReadBool(TUint& pos);
  178. EJsonPathItemType ReadType(TUint& pos);
  179. EJsonPathMode ReadMode(TUint& pos);
  180. const TStringBuf ReadString(TUint& pos);
  181. TVector<TArraySubscriptOffsets> ReadSubscripts(TUint& pos);
  182. template <typename T>
  183. T ReadPOD(TUint& pos) {
  184. static_assert(std::is_pod_v<T>, "Type must be POD");
  185. T value = ReadUnaligned<T>(Path->Begin() + pos);
  186. pos += sizeof(T);
  187. return std::move(value);
  188. }
  189. const TJsonPathPtr Path;
  190. TUint InitialPos;
  191. EJsonPathMode Mode;
  192. THashMap<TUint, TJsonPathItem> ItemCache;
  193. };
  194. }