|
@@ -29,10 +29,10 @@
|
|
|
/** \file
|
|
|
* \brief Compiler front-end interface.
|
|
|
*/
|
|
|
-#include "allocator.h"
|
|
|
+#include "allocator.h"
|
|
|
#include "asserts.h"
|
|
|
#include "compiler.h"
|
|
|
-#include "crc32.h"
|
|
|
+#include "crc32.h"
|
|
|
#include "database.h"
|
|
|
#include "grey.h"
|
|
|
#include "hs_internal.h"
|
|
@@ -58,7 +58,7 @@
|
|
|
#include "rose/rose_build.h"
|
|
|
#include "rose/rose_internal.h"
|
|
|
#include "som/slot_manager_dump.h"
|
|
|
-#include "util/bytecode_ptr.h"
|
|
|
+#include "util/bytecode_ptr.h"
|
|
|
#include "util/compile_error.h"
|
|
|
#include "util/target_info.h"
|
|
|
#include "util/verify_types.h"
|
|
@@ -80,9 +80,9 @@ static
|
|
|
void validateExt(const hs_expr_ext &ext) {
|
|
|
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
|
|
|
HS_EXT_FLAG_MAX_OFFSET |
|
|
|
- HS_EXT_FLAG_MIN_LENGTH |
|
|
|
- HS_EXT_FLAG_EDIT_DISTANCE |
|
|
|
- HS_EXT_FLAG_HAMMING_DISTANCE;
|
|
|
+ HS_EXT_FLAG_MIN_LENGTH |
|
|
|
+ HS_EXT_FLAG_EDIT_DISTANCE |
|
|
|
+ HS_EXT_FLAG_HAMMING_DISTANCE;
|
|
|
if (ext.flags & ~ALL_EXT_FLAGS) {
|
|
|
throw CompileError("Invalid hs_expr_ext flag set.");
|
|
|
}
|
|
@@ -100,13 +100,13 @@ void validateExt(const hs_expr_ext &ext) {
|
|
|
throw CompileError("In hs_expr_ext, min_length must be less than or "
|
|
|
"equal to max_offset.");
|
|
|
}
|
|
|
-
|
|
|
- if ((ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) &&
|
|
|
- (ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE)) {
|
|
|
- throw CompileError("In hs_expr_ext, cannot have both edit distance and "
|
|
|
- "Hamming distance.");
|
|
|
- }
|
|
|
-
|
|
|
+
|
|
|
+ if ((ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) &&
|
|
|
+ (ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE)) {
|
|
|
+ throw CompileError("In hs_expr_ext, cannot have both edit distance and "
|
|
|
+ "Hamming distance.");
|
|
|
+ }
|
|
|
+
|
|
|
}
|
|
|
|
|
|
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
|
|
@@ -150,10 +150,10 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in,
|
|
|
}
|
|
|
|
|
|
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|
|
- unsigned flags, ReportID report,
|
|
|
+ unsigned flags, ReportID report,
|
|
|
const hs_expr_ext *ext)
|
|
|
- : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
|
|
|
- false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
|
|
|
+ : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
|
|
|
+ false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
|
|
|
0, 0, 0, flags & HS_FLAG_QUIET) {
|
|
|
// We disallow SOM + Quiet.
|
|
|
if ((flags & HS_FLAG_QUIET) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
|
@@ -165,7 +165,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|
|
|
|
|
component = parse(expression, mode);
|
|
|
|
|
|
- expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
|
|
|
+ expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
|
|
|
|
|
|
const size_t len = strlen(expression);
|
|
|
if (expr.utf8 && !isValidUtf8(expression, len)) {
|
|
@@ -196,7 +196,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|
|
|
|
|
// Set SOM type.
|
|
|
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
|
|
- expr.som = SOM_LEFT;
|
|
|
+ expr.som = SOM_LEFT;
|
|
|
}
|
|
|
|
|
|
// Set extended parameters, if we have them.
|
|
@@ -205,32 +205,32 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|
|
validateExt(*ext);
|
|
|
|
|
|
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
|
|
|
- expr.min_offset = ext->min_offset;
|
|
|
+ expr.min_offset = ext->min_offset;
|
|
|
}
|
|
|
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
|
|
|
- expr.max_offset = ext->max_offset;
|
|
|
+ expr.max_offset = ext->max_offset;
|
|
|
}
|
|
|
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
|
|
|
- expr.min_length = ext->min_length;
|
|
|
+ expr.min_length = ext->min_length;
|
|
|
+ }
|
|
|
+ if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
|
|
|
+ expr.edit_distance = ext->edit_distance;
|
|
|
+ }
|
|
|
+ if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) {
|
|
|
+ expr.hamm_distance = ext->hamming_distance;
|
|
|
}
|
|
|
- if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
|
|
|
- expr.edit_distance = ext->edit_distance;
|
|
|
- }
|
|
|
- if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) {
|
|
|
- expr.hamm_distance = ext->hamming_distance;
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
// These are validated in validateExt, so an error will already have been
|
|
|
// thrown if these conditions don't hold.
|
|
|
- assert(expr.max_offset >= expr.min_offset);
|
|
|
- assert(expr.max_offset >= expr.min_length);
|
|
|
+ assert(expr.max_offset >= expr.min_offset);
|
|
|
+ assert(expr.max_offset >= expr.min_length);
|
|
|
|
|
|
// Since prefiltering and SOM aren't supported together, we must squash any
|
|
|
// min_length constraint as well.
|
|
|
- if (flags & HS_FLAG_PREFILTER && expr.min_length) {
|
|
|
+ if (flags & HS_FLAG_PREFILTER && expr.min_length) {
|
|
|
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
|
|
|
- expr.min_length = 0;
|
|
|
+ expr.min_length = 0;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -239,25 +239,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|
|
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
|
|
|
* mode.
|
|
|
*/
|
|
|
-void dumpExpression(UNUSED const ParsedExpression &pe,
|
|
|
+void dumpExpression(UNUSED const ParsedExpression &pe,
|
|
|
UNUSED const char *stage, UNUSED const Grey &grey) {
|
|
|
#if defined(DEBUG)
|
|
|
- DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
|
|
|
- pe.expr.report, pe.expr.index);
|
|
|
+ DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
|
|
|
+ pe.expr.report, pe.expr.index);
|
|
|
ostringstream debug_tree;
|
|
|
- dumpTree(debug_tree, pe.component.get());
|
|
|
+ dumpTree(debug_tree, pe.component.get());
|
|
|
printf("%s\n", debug_tree.str().c_str());
|
|
|
#endif // DEBUG
|
|
|
|
|
|
#if defined(DUMP_SUPPORT)
|
|
|
if (grey.dumpFlags & Grey::DUMP_PARSE) {
|
|
|
stringstream ss;
|
|
|
- ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
|
|
|
+ ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
|
|
|
<< stage << ".txt";
|
|
|
ofstream out(ss.str().c_str());
|
|
|
- out << "Component Tree for " << pe.expr.report << endl;
|
|
|
- dumpTree(out, pe.component.get());
|
|
|
- if (pe.expr.utf8) {
|
|
|
+ out << "Component Tree for " << pe.expr.report << endl;
|
|
|
+ dumpTree(out, pe.component.get());
|
|
|
+ if (pe.expr.utf8) {
|
|
|
out << "UTF8 mode" << endl;
|
|
|
}
|
|
|
}
|
|
@@ -267,13 +267,13 @@ void dumpExpression(UNUSED const ParsedExpression &pe,
|
|
|
|
|
|
/** \brief Run Component tree optimisations on \a expr. */
|
|
|
static
|
|
|
-void optimise(ParsedExpression &pe) {
|
|
|
- if (pe.expr.min_length || pe.expr.som) {
|
|
|
+void optimise(ParsedExpression &pe) {
|
|
|
+ if (pe.expr.min_length || pe.expr.som) {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
DEBUG_PRINTF("optimising\n");
|
|
|
- pe.component->optimise(true /* root is connected to sds */);
|
|
|
+ pe.component->optimise(true /* root is connected to sds */);
|
|
|
}
|
|
|
|
|
|
void addExpression(NG &ng, unsigned index, const char *expression,
|
|
@@ -329,34 +329,34 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
|
|
|
|
|
// Do per-expression processing: errors here will result in an exception
|
|
|
// being thrown up to our caller
|
|
|
- ParsedExpression pe(index, expression, flags, id, ext);
|
|
|
- dumpExpression(pe, "orig", cc.grey);
|
|
|
+ ParsedExpression pe(index, expression, flags, id, ext);
|
|
|
+ dumpExpression(pe, "orig", cc.grey);
|
|
|
|
|
|
// Apply prefiltering transformations if desired.
|
|
|
- if (pe.expr.prefilter) {
|
|
|
- prefilterTree(pe.component, ParseMode(flags));
|
|
|
- dumpExpression(pe, "prefiltered", cc.grey);
|
|
|
+ if (pe.expr.prefilter) {
|
|
|
+ prefilterTree(pe.component, ParseMode(flags));
|
|
|
+ dumpExpression(pe, "prefiltered", cc.grey);
|
|
|
}
|
|
|
|
|
|
// Expressions containing zero-width assertions and other extended pcre
|
|
|
// types aren't supported yet. This call will throw a ParseError exception
|
|
|
// if the component tree contains such a construct.
|
|
|
- checkUnsupported(*pe.component);
|
|
|
+ checkUnsupported(*pe.component);
|
|
|
|
|
|
- pe.component->checkEmbeddedStartAnchor(true);
|
|
|
- pe.component->checkEmbeddedEndAnchor(true);
|
|
|
+ pe.component->checkEmbeddedStartAnchor(true);
|
|
|
+ pe.component->checkEmbeddedEndAnchor(true);
|
|
|
|
|
|
if (cc.grey.optimiseComponentTree) {
|
|
|
- optimise(pe);
|
|
|
- dumpExpression(pe, "opt", cc.grey);
|
|
|
+ optimise(pe);
|
|
|
+ dumpExpression(pe, "opt", cc.grey);
|
|
|
}
|
|
|
|
|
|
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
|
|
|
- pe.component.get(), pe.expr.index, pe.expr.report);
|
|
|
+ pe.component.get(), pe.expr.index, pe.expr.report);
|
|
|
|
|
|
// You can only use the SOM flags if you've also specified an SOM
|
|
|
// precision mode.
|
|
|
- if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
|
|
|
+ if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
|
|
|
throw CompileError("To use a SOM expression flag in streaming mode, "
|
|
|
"an SOM precision mode (e.g. "
|
|
|
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
|
|
@@ -364,25 +364,25 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
|
|
|
|
|
// If this expression is a literal, we can feed it directly to Rose rather
|
|
|
// than building the NFA graph.
|
|
|
- if (shortcutLiteral(ng, pe)) {
|
|
|
+ if (shortcutLiteral(ng, pe)) {
|
|
|
DEBUG_PRINTF("took literal short cut\n");
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- auto built_expr = buildGraph(ng.rm, cc, pe);
|
|
|
- if (!built_expr.g) {
|
|
|
+ auto built_expr = buildGraph(ng.rm, cc, pe);
|
|
|
+ if (!built_expr.g) {
|
|
|
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
|
|
|
- "thrown.\n", pe.expr.report);
|
|
|
+ "thrown.\n", pe.expr.report);
|
|
|
throw CompileError("Internal error.");
|
|
|
}
|
|
|
|
|
|
- if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) {
|
|
|
+ if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) {
|
|
|
throw CompileError("Pattern matches empty buffer; use "
|
|
|
"HS_FLAG_ALLOWEMPTY to enable support.");
|
|
|
}
|
|
|
|
|
|
- if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) {
|
|
|
- DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
|
|
|
+ if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) {
|
|
|
+ DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
|
|
|
throw CompileError("Error compiling expression.");
|
|
|
}
|
|
|
}
|
|
@@ -430,7 +430,7 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
|
|
|
}
|
|
|
|
|
|
static
|
|
|
-bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
|
|
+bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
|
|
const u32 minWidth =
|
|
|
ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
|
|
|
auto rose = ng.rose->buildRose(minWidth);
|
|
@@ -455,54 +455,54 @@ platform_t target_to_platform(const target_t &target_info) {
|
|
|
if (!target_info.has_avx2()) {
|
|
|
p |= HS_PLATFORM_NOAVX2;
|
|
|
}
|
|
|
- if (!target_info.has_avx512()) {
|
|
|
- p |= HS_PLATFORM_NOAVX512;
|
|
|
- }
|
|
|
+ if (!target_info.has_avx512()) {
|
|
|
+ p |= HS_PLATFORM_NOAVX512;
|
|
|
+ }
|
|
|
if (!target_info.has_avx512vbmi()) {
|
|
|
p |= HS_PLATFORM_NOAVX512VBMI;
|
|
|
}
|
|
|
return p;
|
|
|
}
|
|
|
|
|
|
-/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
|
|
|
- * \ref hs_database, ensuring that it is padded correctly to give cacheline
|
|
|
- * alignment. */
|
|
|
-static
|
|
|
-hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
|
|
- size_t db_len = sizeof(struct hs_database) + len;
|
|
|
- DEBUG_PRINTF("db size %zu\n", db_len);
|
|
|
- DEBUG_PRINTF("db platform %llx\n", platform);
|
|
|
-
|
|
|
- struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
|
|
|
- if (hs_check_alloc(db) != HS_SUCCESS) {
|
|
|
- hs_database_free(db);
|
|
|
- return nullptr;
|
|
|
- }
|
|
|
-
|
|
|
- // So that none of our database is uninitialized
|
|
|
- memset(db, 0, db_len);
|
|
|
-
|
|
|
- // we need to align things manually
|
|
|
- size_t shift = (uintptr_t)db->bytes & 0x3f;
|
|
|
- DEBUG_PRINTF("shift is %zu\n", shift);
|
|
|
-
|
|
|
- db->bytecode = offsetof(struct hs_database, bytes) - shift;
|
|
|
- char *bytecode = (char *)db + db->bytecode;
|
|
|
- assert(ISALIGNED_CL(bytecode));
|
|
|
-
|
|
|
- db->magic = HS_DB_MAGIC;
|
|
|
- db->version = HS_DB_VERSION;
|
|
|
- db->length = len;
|
|
|
- db->platform = platform;
|
|
|
-
|
|
|
- // Copy bytecode
|
|
|
- memcpy(bytecode, in_bytecode, len);
|
|
|
-
|
|
|
- db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
|
|
|
- return db;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
+/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
|
|
|
+ * \ref hs_database, ensuring that it is padded correctly to give cacheline
|
|
|
+ * alignment. */
|
|
|
+static
|
|
|
+hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
|
|
+ size_t db_len = sizeof(struct hs_database) + len;
|
|
|
+ DEBUG_PRINTF("db size %zu\n", db_len);
|
|
|
+ DEBUG_PRINTF("db platform %llx\n", platform);
|
|
|
+
|
|
|
+ struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
|
|
|
+ if (hs_check_alloc(db) != HS_SUCCESS) {
|
|
|
+ hs_database_free(db);
|
|
|
+ return nullptr;
|
|
|
+ }
|
|
|
+
|
|
|
+ // So that none of our database is uninitialized
|
|
|
+ memset(db, 0, db_len);
|
|
|
+
|
|
|
+ // we need to align things manually
|
|
|
+ size_t shift = (uintptr_t)db->bytes & 0x3f;
|
|
|
+ DEBUG_PRINTF("shift is %zu\n", shift);
|
|
|
+
|
|
|
+ db->bytecode = offsetof(struct hs_database, bytes) - shift;
|
|
|
+ char *bytecode = (char *)db + db->bytecode;
|
|
|
+ assert(ISALIGNED_CL(bytecode));
|
|
|
+
|
|
|
+ db->magic = HS_DB_MAGIC;
|
|
|
+ db->version = HS_DB_VERSION;
|
|
|
+ db->length = len;
|
|
|
+ db->platform = platform;
|
|
|
+
|
|
|
+ // Copy bytecode
|
|
|
+ memcpy(bytecode, in_bytecode, len);
|
|
|
+
|
|
|
+ db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
|
|
|
+ return db;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
|
|
|
assert(length);
|
|
|
|
|
@@ -513,7 +513,7 @@ struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
|
|
|
if (!rose) {
|
|
|
throw CompileError("Unable to generate bytecode.");
|
|
|
}
|
|
|
- *length = rose.size();
|
|
|
+ *length = rose.size();
|
|
|
if (!*length) {
|
|
|
DEBUG_PRINTF("RoseEngine has zero length\n");
|
|
|
assert(0);
|
|
@@ -594,42 +594,42 @@ bool isSupported(const Component &c) {
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
|
|
|
- const ParsedExpression &pe) {
|
|
|
- assert(isSupported(*pe.component));
|
|
|
+BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
|
|
|
+ const ParsedExpression &pe) {
|
|
|
+ assert(isSupported(*pe.component));
|
|
|
|
|
|
- const auto builder = makeNFABuilder(rm, cc, pe);
|
|
|
+ const auto builder = makeNFABuilder(rm, cc, pe);
|
|
|
assert(builder);
|
|
|
|
|
|
// Set up START and ACCEPT states; retrieve the special states
|
|
|
- const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
|
|
|
+ const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
|
|
|
|
|
|
// Map position IDs to characters/components
|
|
|
- pe.component->notePositions(*bs);
|
|
|
+ pe.component->notePositions(*bs);
|
|
|
|
|
|
// Wire the start dotstar state to the firsts
|
|
|
- connectInitialStates(*bs, pe);
|
|
|
+ connectInitialStates(*bs, pe);
|
|
|
|
|
|
DEBUG_PRINTF("wire up body of expr\n");
|
|
|
// Build the rest of the FOLLOW set
|
|
|
vector<PositionInfo> initials = {builder->getStartDotStar(),
|
|
|
builder->getStart()};
|
|
|
- pe.component->buildFollowSet(*bs, initials);
|
|
|
+ pe.component->buildFollowSet(*bs, initials);
|
|
|
|
|
|
// Wire the lasts to the accept state
|
|
|
- connectFinalStates(*bs, pe);
|
|
|
+ connectFinalStates(*bs, pe);
|
|
|
|
|
|
// Create our edges
|
|
|
bs->buildEdges();
|
|
|
|
|
|
- BuiltExpression built_expr = builder->getGraph();
|
|
|
- assert(built_expr.g);
|
|
|
+ BuiltExpression built_expr = builder->getGraph();
|
|
|
+ assert(built_expr.g);
|
|
|
|
|
|
- dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
|
|
|
- cc.grey);
|
|
|
- removeAssertVertices(rm, *built_expr.g, built_expr.expr);
|
|
|
+ dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
|
|
|
+ cc.grey);
|
|
|
+ removeAssertVertices(rm, *built_expr.g, built_expr.expr);
|
|
|
|
|
|
- return built_expr;
|
|
|
+ return built_expr;
|
|
|
}
|
|
|
|
|
|
} // namespace ue2
|