hyperscan.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. #include "hyperscan.h"
  2. #include <contrib/libs/hyperscan/runtime_core2/hs_common.h>
  3. #include <contrib/libs/hyperscan/runtime_core2/hs_runtime.h>
  4. #include <contrib/libs/hyperscan/runtime_corei7/hs_common.h>
  5. #include <contrib/libs/hyperscan/runtime_corei7/hs_runtime.h>
  6. #include <contrib/libs/hyperscan/runtime_avx2/hs_common.h>
  7. #include <contrib/libs/hyperscan/runtime_avx2/hs_runtime.h>
  8. #include <contrib/libs/hyperscan/runtime_avx512/hs_common.h>
  9. #include <contrib/libs/hyperscan/runtime_avx512/hs_runtime.h>
  10. #include <util/generic/singleton.h>
  11. #include <util/system/sanitizers.h>
  12. namespace NHyperscan {
  13. using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>;
  14. using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
  15. namespace NPrivate {
  16. ERuntime DetectCurrentRuntime() {
  17. // TODO: Remove MSanIsOn check upon DEVTOOLSSUPPORT-49258 resolution
  18. if (NX86::HaveAVX512F() && NX86::HaveAVX512BW() && !NSan::MSanIsOn()) {
  19. return ERuntime::AVX512;
  20. } else if (NX86::HaveAVX() && NX86::HaveAVX2()) {
  21. return ERuntime::AVX2;
  22. } else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) {
  23. return ERuntime::Corei7;
  24. } else {
  25. return ERuntime::Core2;
  26. }
  27. }
  28. TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) {
  29. switch (runtime) {
  30. default:
  31. Y_ASSERT(false);
  32. [[fallthrough]];
  33. case ERuntime::Core2:
  34. case ERuntime::Corei7:
  35. return 0;
  36. case ERuntime::AVX2:
  37. return CPU_FEATURES_AVX2;
  38. case ERuntime::AVX512:
  39. return CPU_FEATURES_AVX512;
  40. }
  41. }
  42. hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {
  43. hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};
  44. return platformInfo;
  45. }
  46. hs_platform_info_t MakeCurrentPlatformInfo() {
  47. return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime()));
  48. }
  49. TImpl::TImpl(ERuntime runtime) {
  50. switch (runtime) {
  51. default:
  52. Y_ASSERT(false);
  53. [[fallthrough]];
  54. case ERuntime::Core2:
  55. AllocScratch = core2_hs_alloc_scratch;
  56. Scan = core2_hs_scan;
  57. SerializeDatabase = core2_hs_serialize_database;
  58. DeserializeDatabase = core2_hs_deserialize_database;
  59. break;
  60. case ERuntime::Corei7:
  61. AllocScratch = corei7_hs_alloc_scratch;
  62. Scan = corei7_hs_scan;
  63. SerializeDatabase = corei7_hs_serialize_database;
  64. DeserializeDatabase = corei7_hs_deserialize_database;
  65. break;
  66. case ERuntime::AVX2:
  67. AllocScratch = avx2_hs_alloc_scratch;
  68. Scan = avx2_hs_scan;
  69. SerializeDatabase = avx2_hs_serialize_database;
  70. DeserializeDatabase = avx2_hs_deserialize_database;
  71. break;
  72. case ERuntime::AVX512:
  73. AllocScratch = avx512_hs_alloc_scratch;
  74. Scan = avx512_hs_scan;
  75. SerializeDatabase = avx512_hs_serialize_database;
  76. DeserializeDatabase = avx512_hs_deserialize_database;
  77. }
  78. }
  79. TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {
  80. hs_database_t* rawDb = nullptr;
  81. hs_compile_error_t* rawCompileErr = nullptr;
  82. hs_error_t status = hs_compile(
  83. regex.begin(),
  84. flags,
  85. HS_MODE_BLOCK,
  86. platform,
  87. &rawDb,
  88. &rawCompileErr);
  89. TDatabase db(rawDb);
  90. NHyperscan::TCompileError compileError(rawCompileErr);
  91. if (status != HS_SUCCESS) {
  92. ythrow TCompileException()
  93. << "Failed to compile regex: " << regex << ". "
  94. << "Error message (hyperscan): " << compileError->message;
  95. }
  96. return db;
  97. }
  98. TDatabase CompileMulti(
  99. const TVector<const char*>& regexs,
  100. const TVector<unsigned int>& flags,
  101. const TVector<unsigned int>& ids,
  102. hs_platform_info_t* platform,
  103. const TVector<const hs_expr_ext_t*>* extendedParameters) {
  104. unsigned int count = regexs.size();
  105. if (flags.size() != count) {
  106. ythrow yexception()
  107. << "Mismatch of sizes vectors passed to CompileMulti. "
  108. << "size(regexs) = " << regexs.size() << ". "
  109. << "size(flags) = " << flags.size() << ".";
  110. }
  111. if (ids.size() != count) {
  112. ythrow yexception()
  113. << "Mismatch of sizes vectors passed to CompileMulti. "
  114. << "size(regexs) = " << regexs.size() << ". "
  115. << "size(ids) = " << ids.size() << ".";
  116. }
  117. if (extendedParameters && extendedParameters->size() != count) {
  118. ythrow yexception()
  119. << "Mismatch of sizes vectors passed to CompileMulti. "
  120. << "size(regexs) = " << regexs.size() << ". "
  121. << "size(extendedParameters) = " << extendedParameters->size() << ".";
  122. }
  123. hs_database_t* rawDb = nullptr;
  124. hs_compile_error_t* rawCompileErr = nullptr;
  125. hs_error_t status = hs_compile_ext_multi(
  126. regexs.data(),
  127. flags.data(),
  128. ids.data(),
  129. extendedParameters ? extendedParameters->data() : nullptr,
  130. count,
  131. HS_MODE_BLOCK,
  132. platform,
  133. &rawDb,
  134. &rawCompileErr);
  135. TDatabase db(rawDb);
  136. NHyperscan::TCompileError compileError(rawCompileErr);
  137. if (status != HS_SUCCESS) {
  138. if (compileError->expression >= 0) {
  139. const char* regex = regexs[compileError->expression];
  140. ythrow TCompileException()
  141. << "Failed to compile regex: " << regex << ". "
  142. << "Error message (hyperscan): " << compileError->message;
  143. } else {
  144. ythrow TCompileException()
  145. << "Failed to compile multiple regexs. "
  146. << "Error message (hyperscan): " << compileError->message;
  147. }
  148. }
  149. return db;
  150. }
  151. bool Matches(
  152. const TDatabase& db,
  153. const TScratch& scratch,
  154. const TStringBuf& text,
  155. const TImpl& impl) {
  156. bool result = false;
  157. auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) {
  158. result = true;
  159. return 1; // stop scan
  160. };
  161. Scan(
  162. db,
  163. scratch,
  164. text,
  165. callback,
  166. impl);
  167. return result;
  168. }
  169. } // namespace NPrivate
  170. TDatabase Compile(const TStringBuf& regex, unsigned int flags) {
  171. auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
  172. return NPrivate::Compile(regex, flags, &platformInfo);
  173. }
  174. TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {
  175. auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
  176. return NPrivate::Compile(regex, flags, &platformInfo);
  177. }
  178. TDatabase CompileMulti(
  179. const TVector<const char*>& regexs,
  180. const TVector<unsigned int>& flags,
  181. const TVector<unsigned int>& ids,
  182. const TVector<const hs_expr_ext_t*>* extendedParameters)
  183. {
  184. auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
  185. return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
  186. }
  187. TDatabase CompileMulti(
  188. const TVector<const char*>& regexs,
  189. const TVector<unsigned int>& flags,
  190. const TVector<unsigned int>& ids,
  191. TCPUFeatures cpuFeatures,
  192. const TVector<const hs_expr_ext_t*>* extendedParameters)
  193. {
  194. auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
  195. return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
  196. }
  197. TScratch MakeScratch(const TDatabase& db) {
  198. hs_scratch_t* rawScratch = nullptr;
  199. hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
  200. NHyperscan::TScratch scratch(rawScratch);
  201. if (status != HS_SUCCESS) {
  202. ythrow yexception() << "Failed to make scratch for hyperscan database";
  203. }
  204. return scratch;
  205. }
  206. void GrowScratch(TScratch& scratch, const TDatabase& db) {
  207. hs_scratch_t* rawScratch = scratch.Get();
  208. hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
  209. if (rawScratch != scratch.Get()) {
  210. Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch
  211. scratch.Reset(rawScratch);
  212. }
  213. if (status != HS_SUCCESS) {
  214. ythrow yexception() << "Failed to make grow scratch for hyperscan database";
  215. }
  216. }
  217. TScratch CloneScratch(const TScratch& scratch) {
  218. hs_scratch_t* rawScratch = nullptr;
  219. hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch);
  220. TScratch scratchCopy(rawScratch);
  221. if (status != HS_SUCCESS) {
  222. ythrow yexception() << "Failed to clone scratch for hyperscan database";
  223. }
  224. return scratchCopy;
  225. }
  226. bool Matches(
  227. const TDatabase& db,
  228. const TScratch& scratch,
  229. const TStringBuf& text)
  230. {
  231. return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());
  232. }
  233. TString Serialize(const TDatabase& db) {
  234. char* databaseBytes = nullptr;
  235. size_t databaseLength;
  236. hs_error_t status = Singleton<NPrivate::TImpl>()->SerializeDatabase(
  237. db.Get(),
  238. &databaseBytes,
  239. &databaseLength);
  240. TSerializedDatabase serialization(databaseBytes);
  241. if (status != HS_SUCCESS) {
  242. ythrow yexception() << "Failed to serialize hyperscan database";
  243. }
  244. return TString(serialization.Get(), databaseLength);
  245. }
  246. TDatabase Deserialize(const TStringBuf& serialization) {
  247. hs_database_t* rawDb = nullptr;
  248. hs_error_t status = Singleton<NPrivate::TImpl>()->DeserializeDatabase(
  249. serialization.begin(),
  250. serialization.size(),
  251. &rawDb);
  252. TDatabase db(rawDb);
  253. if (status != HS_SUCCESS) {
  254. if (status == HS_DB_PLATFORM_ERROR) {
  255. ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";
  256. } else if (status == HS_DB_VERSION_ERROR) {
  257. ythrow yexception() << "Need recreate Hyperscan database with new version Hyperscan";
  258. } else {
  259. ythrow yexception() << "Failed to deserialize hyperscan database (status = " << status << ")";
  260. }
  261. }
  262. return db;
  263. }
  264. }