hyperscan.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. #include "hyperscan.h"
  2. #include <contrib/libs/hyperscan/runtime_core2/hs_common.h>
  3. #include <contrib/libs/hyperscan/runtime_core2/hs_runtime.h>
  4. #include <contrib/libs/hyperscan/runtime_corei7/hs_common.h>
  5. #include <contrib/libs/hyperscan/runtime_corei7/hs_runtime.h>
  6. #include <contrib/libs/hyperscan/runtime_avx2/hs_common.h>
  7. #include <contrib/libs/hyperscan/runtime_avx2/hs_runtime.h>
  8. #include <contrib/libs/hyperscan/runtime_avx512/hs_common.h>
  9. #include <contrib/libs/hyperscan/runtime_avx512/hs_runtime.h>
  10. #include <util/generic/singleton.h>
  11. namespace NHyperscan {
  12. using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>;
  13. using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
  14. namespace NPrivate {
  15. ERuntime DetectCurrentRuntime() {
  16. if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) {
  17. return ERuntime::AVX512;
  18. } else if (NX86::HaveAVX() && NX86::HaveAVX2()) {
  19. return ERuntime::AVX2;
  20. } else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) {
  21. return ERuntime::Corei7;
  22. } else {
  23. return ERuntime::Core2;
  24. }
  25. }
  26. TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) {
  27. switch (runtime) {
  28. default:
  29. Y_ASSERT(false);
  30. [[fallthrough]];
  31. case ERuntime::Core2:
  32. case ERuntime::Corei7:
  33. return 0;
  34. case ERuntime::AVX2:
  35. return CPU_FEATURES_AVX2;
  36. case ERuntime::AVX512:
  37. return CPU_FEATURES_AVX512;
  38. }
  39. }
  40. hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {
  41. hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};
  42. return platformInfo;
  43. }
  44. hs_platform_info_t MakeCurrentPlatformInfo() {
  45. return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime()));
  46. }
  47. TImpl::TImpl(ERuntime runtime) {
  48. switch (runtime) {
  49. default:
  50. Y_ASSERT(false);
  51. [[fallthrough]];
  52. case ERuntime::Core2:
  53. AllocScratch = core2_hs_alloc_scratch;
  54. Scan = core2_hs_scan;
  55. SerializeDatabase = core2_hs_serialize_database;
  56. DeserializeDatabase = core2_hs_deserialize_database;
  57. break;
  58. case ERuntime::Corei7:
  59. AllocScratch = corei7_hs_alloc_scratch;
  60. Scan = corei7_hs_scan;
  61. SerializeDatabase = corei7_hs_serialize_database;
  62. DeserializeDatabase = corei7_hs_deserialize_database;
  63. break;
  64. case ERuntime::AVX2:
  65. AllocScratch = avx2_hs_alloc_scratch;
  66. Scan = avx2_hs_scan;
  67. SerializeDatabase = avx2_hs_serialize_database;
  68. DeserializeDatabase = avx2_hs_deserialize_database;
  69. break;
  70. case ERuntime::AVX512:
  71. AllocScratch = avx512_hs_alloc_scratch;
  72. Scan = avx512_hs_scan;
  73. SerializeDatabase = avx512_hs_serialize_database;
  74. DeserializeDatabase = avx512_hs_deserialize_database;
  75. }
  76. }
  77. TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {
  78. hs_database_t* rawDb = nullptr;
  79. hs_compile_error_t* rawCompileErr = nullptr;
  80. hs_error_t status = hs_compile(
  81. regex.begin(),
  82. flags,
  83. HS_MODE_BLOCK,
  84. platform,
  85. &rawDb,
  86. &rawCompileErr);
  87. TDatabase db(rawDb);
  88. NHyperscan::TCompileError compileError(rawCompileErr);
  89. if (status != HS_SUCCESS) {
  90. ythrow TCompileException()
  91. << "Failed to compile regex: " << regex << ". "
  92. << "Error message (hyperscan): " << compileError->message;
  93. }
  94. return db;
  95. }
  96. TDatabase CompileMulti(
  97. const TVector<const char*>& regexs,
  98. const TVector<unsigned int>& flags,
  99. const TVector<unsigned int>& ids,
  100. hs_platform_info_t* platform,
  101. const TVector<const hs_expr_ext_t*>* extendedParameters) {
  102. unsigned int count = regexs.size();
  103. if (flags.size() != count) {
  104. ythrow yexception()
  105. << "Mismatch of sizes vectors passed to CompileMulti. "
  106. << "size(regexs) = " << regexs.size() << ". "
  107. << "size(flags) = " << flags.size() << ".";
  108. }
  109. if (ids.size() != count) {
  110. ythrow yexception()
  111. << "Mismatch of sizes vectors passed to CompileMulti. "
  112. << "size(regexs) = " << regexs.size() << ". "
  113. << "size(ids) = " << ids.size() << ".";
  114. }
  115. if (extendedParameters && extendedParameters->size() != count) {
  116. ythrow yexception()
  117. << "Mismatch of sizes vectors passed to CompileMulti. "
  118. << "size(regexs) = " << regexs.size() << ". "
  119. << "size(extendedParameters) = " << extendedParameters->size() << ".";
  120. }
  121. hs_database_t* rawDb = nullptr;
  122. hs_compile_error_t* rawCompileErr = nullptr;
  123. hs_error_t status = hs_compile_ext_multi(
  124. regexs.data(),
  125. flags.data(),
  126. ids.data(),
  127. extendedParameters ? extendedParameters->data() : nullptr,
  128. count,
  129. HS_MODE_BLOCK,
  130. platform,
  131. &rawDb,
  132. &rawCompileErr);
  133. TDatabase db(rawDb);
  134. NHyperscan::TCompileError compileError(rawCompileErr);
  135. if (status != HS_SUCCESS) {
  136. if (compileError->expression >= 0) {
  137. const char* regex = regexs[compileError->expression];
  138. ythrow TCompileException()
  139. << "Failed to compile regex: " << regex << ". "
  140. << "Error message (hyperscan): " << compileError->message;
  141. } else {
  142. ythrow TCompileException()
  143. << "Failed to compile multiple regexs. "
  144. << "Error message (hyperscan): " << compileError->message;
  145. }
  146. }
  147. return db;
  148. }
  149. bool Matches(
  150. const TDatabase& db,
  151. const TScratch& scratch,
  152. const TStringBuf& text,
  153. const TImpl& impl) {
  154. bool result = false;
  155. auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) {
  156. result = true;
  157. return 1; // stop scan
  158. };
  159. Scan(
  160. db,
  161. scratch,
  162. text,
  163. callback,
  164. impl);
  165. return result;
  166. }
  167. } // namespace NPrivate
  168. TDatabase Compile(const TStringBuf& regex, unsigned int flags) {
  169. auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
  170. return NPrivate::Compile(regex, flags, &platformInfo);
  171. }
  172. TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {
  173. auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
  174. return NPrivate::Compile(regex, flags, &platformInfo);
  175. }
  176. TDatabase CompileMulti(
  177. const TVector<const char*>& regexs,
  178. const TVector<unsigned int>& flags,
  179. const TVector<unsigned int>& ids,
  180. const TVector<const hs_expr_ext_t*>* extendedParameters)
  181. {
  182. auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
  183. return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
  184. }
  185. TDatabase CompileMulti(
  186. const TVector<const char*>& regexs,
  187. const TVector<unsigned int>& flags,
  188. const TVector<unsigned int>& ids,
  189. TCPUFeatures cpuFeatures,
  190. const TVector<const hs_expr_ext_t*>* extendedParameters)
  191. {
  192. auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
  193. return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
  194. }
  195. TScratch MakeScratch(const TDatabase& db) {
  196. hs_scratch_t* rawScratch = nullptr;
  197. hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
  198. NHyperscan::TScratch scratch(rawScratch);
  199. if (status != HS_SUCCESS) {
  200. ythrow yexception() << "Failed to make scratch for hyperscan database";
  201. }
  202. return scratch;
  203. }
  204. void GrowScratch(TScratch& scratch, const TDatabase& db) {
  205. hs_scratch_t* rawScratch = scratch.Get();
  206. hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
  207. if (rawScratch != scratch.Get()) {
  208. Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch
  209. scratch.Reset(rawScratch);
  210. }
  211. if (status != HS_SUCCESS) {
  212. ythrow yexception() << "Failed to make grow scratch for hyperscan database";
  213. }
  214. }
  215. TScratch CloneScratch(const TScratch& scratch) {
  216. hs_scratch_t* rawScratch = nullptr;
  217. hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch);
  218. TScratch scratchCopy(rawScratch);
  219. if (status != HS_SUCCESS) {
  220. ythrow yexception() << "Failed to clone scratch for hyperscan database";
  221. }
  222. return scratchCopy;
  223. }
  224. bool Matches(
  225. const TDatabase& db,
  226. const TScratch& scratch,
  227. const TStringBuf& text)
  228. {
  229. return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());
  230. }
  231. TString Serialize(const TDatabase& db) {
  232. char* databaseBytes = nullptr;
  233. size_t databaseLength;
  234. hs_error_t status = Singleton<NPrivate::TImpl>()->SerializeDatabase(
  235. db.Get(),
  236. &databaseBytes,
  237. &databaseLength);
  238. TSerializedDatabase serialization(databaseBytes);
  239. if (status != HS_SUCCESS) {
  240. ythrow yexception() << "Failed to serialize hyperscan database";
  241. }
  242. return TString(serialization.Get(), databaseLength);
  243. }
  244. TDatabase Deserialize(const TStringBuf& serialization) {
  245. hs_database_t* rawDb = nullptr;
  246. hs_error_t status = Singleton<NPrivate::TImpl>()->DeserializeDatabase(
  247. serialization.begin(),
  248. serialization.size(),
  249. &rawDb);
  250. TDatabase db(rawDb);
  251. if (status != HS_SUCCESS) {
  252. if (status == HS_DB_PLATFORM_ERROR) {
  253. ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";
  254. } else if (status == HS_DB_VERSION_ERROR) {
  255. ythrow yexception() << "Need recreate Hyperscan database with new version Hyperscan";
  256. } else {
  257. ythrow yexception() << "Failed to deserialize hyperscan database (status = " << status << ")";
  258. }
  259. }
  260. return db;
  261. }
  262. }