hyperscan.h 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. #pragma once
  2. #include <contrib/libs/hyperscan/src/hs.h>
  3. #include <util/generic/ptr.h>
  4. #include <util/generic/strbuf.h>
  5. #include <util/generic/vector.h>
  6. #include <util/generic/yexception.h>
  7. #include <util/system/cpu_id.h>
  8. namespace NHyperscan {
  9. using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
  10. constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
  11. template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
  12. class TDeleter {
  13. public:
  14. template<typename T>
  15. static void Destroy(T* ptr) {
  16. NativeDeleter(ptr);
  17. }
  18. };
  19. using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>;
  20. using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
  21. class TCompileException : public yexception {
  22. };
  23. namespace NPrivate {
  24. enum class ERuntime {
  25. Core2 = 0,
  26. Corei7 = 1,
  27. AVX2 = 2,
  28. };
  29. ERuntime DetectCurrentRuntime();
  30. TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);
  31. hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);
  32. struct TImpl {
  33. hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);
  34. hs_error_t (*Scan)(const hs_database_t* db, const char* data,
  35. unsigned length, unsigned flags, hs_scratch_t* scratch,
  36. match_event_handler onEvent, void* userCtx);
  37. hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);
  38. hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);
  39. TImpl() : TImpl(DetectCurrentRuntime()) {}
  40. explicit TImpl(ERuntime runtime);
  41. };
  42. TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);
  43. TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags, hs_platform_info_t* platform);
  44. TDatabase CompileMulti(
  45. const TVector<const char*>& regexs,
  46. const TVector<unsigned int>& flags,
  47. const TVector<unsigned int>& ids,
  48. hs_platform_info_t* platform,
  49. const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
  50. TDatabase CompileMultiLiteral(
  51. const TVector<const char*>& literals,
  52. const TVector<unsigned int>& flags,
  53. const TVector<unsigned int>& ids,
  54. const TVector<size_t>& lens,
  55. hs_platform_info_t* platform);
  56. // We need to parametrize Scan and Matches functions for testing purposes
  57. template<typename TCallback>
  58. void Scan(
  59. const TDatabase& db,
  60. const TScratch& scratch,
  61. const TStringBuf& text,
  62. TCallback& callback, // applied to index of matched regex
  63. const TImpl& impl
  64. ) {
  65. struct TCallbackWrapper {
  66. static int EventHandler(
  67. unsigned int id,
  68. unsigned long long from,
  69. unsigned long long to,
  70. unsigned int flags,
  71. void* ctx) {
  72. Y_UNUSED(flags);
  73. TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);
  74. if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {
  75. return callback2(id, from, to);
  76. } else {
  77. callback2(id, from, to);
  78. return 0;
  79. }
  80. }
  81. };
  82. unsigned int flags = 0; // unused at present
  83. hs_error_t status = impl.Scan(
  84. db.Get(),
  85. text.begin(),
  86. text.size(),
  87. flags,
  88. scratch.Get(),
  89. &TCallbackWrapper::EventHandler,
  90. &callback);
  91. if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {
  92. ythrow yexception() << "Failed to scan against text: " << text;
  93. }
  94. }
  95. bool Matches(
  96. const TDatabase& db,
  97. const TScratch& scratch,
  98. const TStringBuf& text,
  99. const TImpl& impl);
  100. }
  101. TDatabase Compile(const TStringBuf& regex, unsigned int flags);
  102. TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);
  103. TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags);
  104. TDatabase CompileLiteral(const TStringBuf& literal, unsigned int flags, TCPUFeatures cpuFeatures);
  105. TDatabase CompileMulti(
  106. const TVector<const char*>& regexs,
  107. const TVector<unsigned int>& flags,
  108. const TVector<unsigned int>& ids,
  109. const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
  110. TDatabase CompileMulti(
  111. const TVector<const char*>& regexs,
  112. const TVector<unsigned int>& flags,
  113. const TVector<unsigned int>& ids,
  114. TCPUFeatures cpuFeatures,
  115. const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
  116. TDatabase CompileMultiLiteral(
  117. const TVector<const char*>& literals,
  118. const TVector<unsigned int>& flags,
  119. const TVector<unsigned int>& ids,
  120. const TVector<size_t>& lens);
  121. TDatabase CompileMultiLiteral(
  122. const TVector<const char*>& literals,
  123. const TVector<unsigned int>& flags,
  124. const TVector<unsigned int>& ids,
  125. const TVector<size_t>& lens,
  126. TCPUFeatures cpuFeatures);
  127. TScratch MakeScratch(const TDatabase& db);
  128. void GrowScratch(TScratch& scratch, const TDatabase& db);
  129. TScratch CloneScratch(const TScratch& scratch);
  130. template<typename TCallback>
  131. void Scan(
  132. const TDatabase& db,
  133. const TScratch& scratch,
  134. const TStringBuf& text,
  135. TCallback& callback // applied to index of matched regex
  136. ) {
  137. NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
  138. }
  139. bool Matches(
  140. const TDatabase& db,
  141. const TScratch& scratch,
  142. const TStringBuf& text);
  143. TString Serialize(const TDatabase& db);
  144. TDatabase Deserialize(const TStringBuf& serialization);
  145. }