hyperscan.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #pragma once
  2. #include <contrib/libs/hyperscan/src/hs.h>
  3. #include <util/generic/ptr.h>
  4. #include <util/generic/strbuf.h>
  5. #include <util/generic/vector.h>
  6. #include <util/generic/yexception.h>
  7. #include <util/system/cpu_id.h>
  8. namespace NHyperscan {
  9. using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
  10. constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
  11. constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;
  12. template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
  13. class TDeleter {
  14. public:
  15. template<typename T>
  16. static void Destroy(T* ptr) {
  17. NativeDeleter(ptr);
  18. }
  19. };
  20. using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>;
  21. using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
  22. class TCompileException : public yexception {
  23. };
  24. namespace NPrivate {
  25. enum class ERuntime {
  26. Core2 = 0,
  27. Corei7 = 1,
  28. AVX2 = 2,
  29. AVX512 = 3
  30. };
  31. ERuntime DetectCurrentRuntime();
  32. TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);
  33. hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);
  34. struct TImpl {
  35. hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);
  36. hs_error_t (*Scan)(const hs_database_t* db, const char* data,
  37. unsigned length, unsigned flags, hs_scratch_t* scratch,
  38. match_event_handler onEvent, void* userCtx);
  39. hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);
  40. hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);
  41. TImpl() : TImpl(DetectCurrentRuntime()) {}
  42. explicit TImpl(ERuntime runtime);
  43. };
  44. TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);
  45. TDatabase CompileMulti(
  46. const TVector<const char*>& regexs,
  47. const TVector<unsigned int>& flags,
  48. const TVector<unsigned int>& ids,
  49. hs_platform_info_t* platform,
  50. const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
  51. // We need to parametrize Scan and Matches functions for testing purposes
  52. template<typename TCallback>
  53. void Scan(
  54. const TDatabase& db,
  55. const TScratch& scratch,
  56. const TStringBuf& text,
  57. TCallback& callback, // applied to index of matched regex
  58. const TImpl& impl
  59. ) {
  60. struct TCallbackWrapper {
  61. static int EventHandler(
  62. unsigned int id,
  63. unsigned long long from,
  64. unsigned long long to,
  65. unsigned int flags,
  66. void* ctx) {
  67. Y_UNUSED(flags);
  68. TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);
  69. if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {
  70. return callback2(id, from, to);
  71. } else {
  72. callback2(id, from, to);
  73. return 0;
  74. }
  75. }
  76. };
  77. unsigned int flags = 0; // unused at present
  78. hs_error_t status = impl.Scan(
  79. db.Get(),
  80. text.begin(),
  81. text.size(),
  82. flags,
  83. scratch.Get(),
  84. &TCallbackWrapper::EventHandler,
  85. &callback);
  86. if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {
  87. ythrow yexception() << "Failed to scan against text: " << text;
  88. }
  89. }
  90. bool Matches(
  91. const TDatabase& db,
  92. const TScratch& scratch,
  93. const TStringBuf& text,
  94. const TImpl& impl);
  95. }
  96. TDatabase Compile(const TStringBuf& regex, unsigned int flags);
  97. TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);
  98. TDatabase CompileMulti(
  99. const TVector<const char*>& regexs,
  100. const TVector<unsigned int>& flags,
  101. const TVector<unsigned int>& ids,
  102. const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
  103. TDatabase CompileMulti(
  104. const TVector<const char*>& regexs,
  105. const TVector<unsigned int>& flags,
  106. const TVector<unsigned int>& ids,
  107. TCPUFeatures cpuFeatures,
  108. const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
  109. TScratch MakeScratch(const TDatabase& db);
  110. void GrowScratch(TScratch& scratch, const TDatabase& db);
  111. TScratch CloneScratch(const TScratch& scratch);
  112. template<typename TCallback>
  113. void Scan(
  114. const TDatabase& db,
  115. const TScratch& scratch,
  116. const TStringBuf& text,
  117. TCallback& callback // applied to index of matched regex
  118. ) {
  119. NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
  120. }
  121. bool Matches(
  122. const TDatabase& db,
  123. const TScratch& scratch,
  124. const TStringBuf& text);
  125. TString Serialize(const TDatabase& db);
  126. TDatabase Deserialize(const TStringBuf& serialization);
  127. }