re2.cpp 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. #include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
  2. #include <yql/essentials/minikql/jsonpath/rewrapper/registrator.h>
  3. #include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
  4. #include <contrib/libs/re2/re2/re2.h>
  5. #include <util/charset/utf8.h>
  6. namespace NReWrapper {
  7. using namespace re2;
  8. namespace NRe2 {
  9. namespace {
  10. RE2::Options CreateOptions(const TStringBuf& regex, unsigned int flags) {
  11. RE2::Options options;
  12. bool needUtf8 = (UTF8Detect(regex) == UTF8);
  13. options.set_encoding(
  14. needUtf8
  15. ? RE2::Options::Encoding::EncodingUTF8
  16. : RE2::Options::Encoding::EncodingLatin1
  17. );
  18. options.set_case_sensitive(!(flags & FLAGS_CASELESS));
  19. return options;
  20. }
  21. class TRe2 : public IRe {
  22. public:
  23. TRe2(const TStringBuf& regex, unsigned int flags)
  24. : Regexp(StringPiece(regex.data(), regex.size()), CreateOptions(regex, flags))
  25. {
  26. auto re2 = RawRegexp.MutableRe2();
  27. re2->set_regexp(TString(regex));
  28. re2->set_flags(flags);
  29. }
  30. TRe2(const TSerialization& proto)
  31. : Regexp(StringPiece(proto.GetRe2().GetRegexp().data(), proto.GetRe2().GetRegexp().size()),
  32. CreateOptions(proto.GetRe2().GetRegexp(), proto.GetRe2().GetFlags()))
  33. , RawRegexp(proto)
  34. { }
  35. bool Matches(const TStringBuf& text) const override {
  36. const StringPiece piece(text.data(), text.size());
  37. RE2::Anchor anchor = RE2::UNANCHORED;
  38. return Regexp.Match(piece, 0, text.size(), anchor, nullptr, 0);
  39. }
  40. TString Serialize() const override {
  41. TString data;
  42. auto res = RawRegexp.SerializeToString(&data);
  43. Y_ABORT_UNLESS(res);
  44. return data;
  45. }
  46. bool Ok(TString* error) const {
  47. if (Regexp.ok()) {
  48. return true;
  49. } else {
  50. *error = Regexp.error();
  51. return false;
  52. }
  53. }
  54. private:
  55. RE2 Regexp;
  56. TSerialization RawRegexp;
  57. };
  58. }
  59. IRePtr Compile(const TStringBuf& regex, unsigned int flags) {
  60. auto ptr = std::make_unique<TRe2>(regex, flags);
  61. TString error;
  62. if (!ptr->Ok(&error)) {
  63. ythrow TCompileException() << error;
  64. }
  65. return ptr;
  66. }
  67. IRePtr Deserialize(const TSerialization& p) {
  68. return std::make_unique<TRe2>(p);
  69. }
  70. REGISTER_RE_LIB(TSerialization::kRe2, Compile, Deserialize)
  71. }
  72. }