hyperscan.cpp 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #include <yql/essentials/minikql/jsonpath/rewrapper/re.h>
  2. #include <yql/essentials/minikql/jsonpath/rewrapper/registrator.h>
  3. #include <yql/essentials/minikql/jsonpath/rewrapper/proto/serialization.pb.h>
  4. #include <library/cpp/regex/hyperscan/hyperscan.h>
  5. #include <util/charset/utf8.h>
  6. namespace NReWrapper {
  7. namespace NHyperscan {
  8. namespace {
  9. class THyperscan : public IRe {
  10. public:
  11. THyperscan(::NHyperscan::TDatabase&& db)
  12. : Database(std::move(db))
  13. { }
  14. bool Matches(const TStringBuf& text) const override {
  15. if (!Scratch) {
  16. Scratch = ::NHyperscan::MakeScratch(Database);
  17. }
  18. return ::NHyperscan::Matches(Database, Scratch, text);
  19. }
  20. TString Serialize() const override {
  21. // Compatibility with old versions
  22. return ::NHyperscan::Serialize(Database);
  23. /*
  24. * TSerialization proto;
  25. * proto.SetHyperscan(::NHyperscan::Serialize(Database));
  26. * TString data;
  27. * auto res = proto.SerializeToString(&data);
  28. * Y_ABORT_UNLESS(res);
  29. * return data;
  30. */
  31. }
  32. private:
  33. ::NHyperscan::TDatabase Database;
  34. mutable ::NHyperscan::TScratch Scratch;
  35. };
  36. }
  37. IRePtr Compile(const TStringBuf& regex, unsigned int flags) {
  38. unsigned int hyperscanFlags = 0;
  39. try {
  40. if (UTF8Detect(regex)) {
  41. hyperscanFlags |= HS_FLAG_UTF8;
  42. }
  43. if (NX86::HaveAVX2()) {
  44. hyperscanFlags |= HS_CPU_FEATURES_AVX2;
  45. }
  46. if (flags & FLAGS_CASELESS) {
  47. hyperscanFlags |= HS_FLAG_CASELESS;
  48. }
  49. return std::make_unique<THyperscan>(::NHyperscan::Compile(regex, hyperscanFlags));
  50. } catch (const ::NHyperscan::TCompileException& ex) {
  51. ythrow TCompileException() << ex.what();
  52. }
  53. }
  54. IRePtr Deserialize(const TSerialization& proto) {
  55. return std::make_unique<THyperscan>(::NHyperscan::Deserialize(proto.GetHyperscan()));
  56. }
  57. REGISTER_RE_LIB(TSerialization::kHyperscan, Compile, Deserialize)
  58. }
  59. }