1 year ago · 764039b04f
--- a/src/sentry/grouping/strategies/configurations.py
+++ b/src/sentry/grouping/strategies/configurations.py
@@ -33,9 +33,9 @@ BASE_STRATEGY = create_strategy_configuration(
 
				         # strategy to disable itself.  Recursion is detected by the outer
			
 
				         # strategy.
			
 
				         "is_recursion": False,
			
 
				-        # This turns on the automatic message trimming by the message
			
 
				-        # strategy.
			
 
				-        "trim_message": False,
			
 
				+        # This turns on the automatic message trimming and parameter substitution
			
 
				+        # by the message strategy.
			
 
				+        "normalize_message": False,
			
 
				         # newstyle: enables the legacy function logic.  This is only used
			
 
				         # by the newstyle:2019-04-05 strategy.  Once this is no longer used
			
 
				         # this can go away entirely.
			
@@ -66,7 +66,7 @@ BASE_STRATEGY = create_strategy_configuration(
 
				         "use_package_fallback": False,
			
 
				         # Remove platform differences in native frames
			
 
				         "native_fuzzing": False,
			
 
				-        # Ignore exception types for native if they are platform specific error
			
 
				+        # Ignore exception types for native if they are platform-specific error
			
 
				         # codes. Normally SDKs are supposed to disable error-type grouping with
			
 
				         # the `synthetic` flag in the event, but a lot of error types we can
			
 
				         # also detect on the backend.
			
@@ -105,7 +105,7 @@ register_strategy_config(
 
				         * Some known weaknesses with regards to grouping of native frames
			
 
				     """,
			
 
				     initial_context={
			
 
				-        "trim_message": False,
			
 
				+        "normalize_message": False,
			
 
				     },
			
 
				     enhancements_base="legacy:2019-03-12",
			
 
				 )
			
@@ -135,7 +135,7 @@ register_strategy_config(
 
				         "javascript_fuzzing": True,
			
 
				         "contextline_platforms": ("javascript", "node", "python", "php", "ruby"),
			
 
				         "with_context_line_file_origin_bug": True,
			
 
				-        "trim_message": True,
			
 
				+        "normalize_message": True,
			
 
				         "with_exception_value_fallback": True,
			
 
				     },
			
 
				     enhancements_base="common:2019-03-23",
			
@@ -230,7 +230,7 @@ register_strategy_config(
 
				     hidden=True,
			
 
				     initial_context={
			
 
				         "legacy_function_logic": False,
			
 
				-        "trim_message": True,
			
 
				+        "normalize_message": True,
			
 
				         "with_exception_value_fallback": True,
			
 
				     },
			
 
				 )
			
--- a/src/sentry/grouping/strategies/message.py
+++ b/src/sentry/grouping/strategies/message.py
@@ -13,7 +13,9 @@ from sentry.grouping.strategies.base import (
 
				 from sentry.interfaces.message import Message
			
 
				 from sentry.utils import metrics
			
 
				 
			
 
				-_irrelevant_re = re.compile(
			
 
				+_parameterization_regex = re.compile(
			
 
				+    # The `(?x)` tells the regex compiler to ingore comments and unescaped whitespace,
			
 
				+    # so we can use newlines and indentation for better legibility.
			
 
				     r"""(?x)
			
 
				     (?P<email>
			
 
				         [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*
			
@@ -96,32 +98,44 @@ _irrelevant_re = re.compile(
 
				         \b\d+\b
			
 
				     ) |
			
 
				     (?P<quoted_str>
			
 
				+        # The `=` here guarantees we'll only match the value half of key-value pairs,
			
 
				+        # rather than all quoted strings
			
 
				         ='([\w\s]+)'
			
 
				     )
			
 
				 """
			
 
				 )
			
 
				 
			
 
				 
			
 
				-def trim_message_for_grouping(string: str) -> str:
			
 
				-    """Replace values from a group's message to hide P.I.I. and improve grouping when no
			
 
				-    stacktrace available.
			
 
				+def normalize_message_for_grouping(message: str) -> str:
			
 
				+    """Replace values from a group's message with placeholders (to hide P.I.I. and
			
 
				+    improve grouping when no stacktrace is available) and trim to at most 2 lines.
			
 
				     """
			
 
				-    s = "\n".join(islice((x for x in string.splitlines() if x.strip()), 2)).strip()
			
 
				-    if s != string:
			
 
				-        s += "..."
			
 
				+    trimmed = "\n".join(
			
 
				+        # If there are multiple lines, grab the first two non-empty ones.
			
 
				+        islice(
			
 
				+            (x for x in message.splitlines() if x.strip()),
			
 
				+            2,
			
 
				+        )
			
 
				+    )
			
 
				+    if trimmed != message:
			
 
				+        trimmed += "..."
			
 
				 
			
 
				     def _handle_match(match: Match[str]) -> str:
			
 
				-        # e.g. hex, 0x40000015
			
 
				+        # Find the first (should be only) non-None match entry, and sub in the placeholder. For
			
 
				+        # example, given the groupdict item `('hex', '0x40000015')`, this returns '<hex>' as a
			
 
				+        # replacement for the original value in the string.
			
 
				         for key, value in match.groupdict().items():
			
 
				             if value is not None:
			
 
				-                # key can be one of the keys from _irrelevant_re, thus, not a large cardinality
			
 
				-                # tracking the key helps distinguish what kinds of replacements are happening
			
 
				+                # `key` can only be one of the keys from `_parameterization_regex`, thus, not a large
			
 
				+                # cardinality. Tracking the key helps distinguish what kinds of replacements are happening.
			
 
				                 metrics.incr("grouping.value_trimmed_from_message", tags={"key": key})
			
 
				-                # For quoted_str we want to preserver the = symbol
			
 
				+                # For `quoted_str` we want to preserve the `=` symbol, which we include in
			
 
				+                # the match in order not to replace random quoted strings in contexts other
			
 
				+                # than key-value pairs
			
 
				                 return f"=<{key}>" if key == "quoted_str" else f"<{key}>"
			
 
				         return ""
			
 
				 
			
 
				-    return _irrelevant_re.sub(_handle_match, s)
			
 
				+    return _parameterization_regex.sub(_handle_match, trimmed)
			
 
				 
			
 
				 
			
 
				 @strategy(ids=["message:v1"], interface=Message, score=0)
			
@@ -129,14 +143,14 @@ def trim_message_for_grouping(string: str) -> str:
 
				 def message_v1(
			
 
				     interface: Message, event: Event, context: GroupingContext, **meta: Any
			
 
				 ) -> ReturnedVariants:
			
 
				-    if context["trim_message"]:
			
 
				-        message_in = interface.message or interface.formatted or ""
			
 
				-        message_trimmed = trim_message_for_grouping(message_in)
			
 
				-        hint = "stripped common values" if message_in != message_trimmed else None
			
 
				+    if context["normalize_message"]:
			
 
				+        raw = interface.message or interface.formatted or ""
			
 
				+        normalized = normalize_message_for_grouping(raw)
			
 
				+        hint = "stripped event-specific values" if raw != normalized else None
			
 
				         return {
			
 
				             context["variant"]: GroupingComponent(
			
 
				                 id="message",
			
 
				-                values=[message_trimmed],
			
 
				+                values=[normalized],
			
 
				                 hint=hint,
			
 
				             )
			
 
				         }
			
--- a/src/sentry/grouping/strategies/newstyle.py
+++ b/src/sentry/grouping/strategies/newstyle.py
@@ -15,7 +15,7 @@ from sentry.grouping.strategies.base import (
 
				     strategy,
			
 
				 )
			
 
				 from sentry.grouping.strategies.hierarchical import get_stacktrace_hierarchy
			
 
				-from sentry.grouping.strategies.message import trim_message_for_grouping
			
 
				+from sentry.grouping.strategies.message import normalize_message_for_grouping
			
 
				 from sentry.grouping.strategies.utils import has_url_origin, remove_non_stacktrace_variants
			
 
				 from sentry.grouping.utils import hash_from_values
			
 
				 from sentry.interfaces.exception import Exception as ChainedException
			
@@ -642,12 +642,12 @@ def single_exception(
 
				                 id="value",
			
 
				             )
			
 
				 
			
 
				-            value_in = interface.value
			
 
				-            if value_in is not None:
			
 
				-                value_trimmed = trim_message_for_grouping(value_in)
			
 
				-                hint = "stripped common values" if value_in != value_trimmed else None
			
 
				-                if value_trimmed:
			
 
				-                    value_component.update(values=[value_trimmed], hint=hint)
			
 
				+            raw = interface.value
			
 
				+            if raw is not None:
			
 
				+                normalized = normalize_message_for_grouping(raw)
			
 
				+                hint = "stripped event-specific values" if raw != normalized else None
			
 
				+                if normalized:
			
 
				+                    value_component.update(values=[normalized], hint=hint)
			
 
				 
			
 
				             if stacktrace_component.contributes and value_component.contributes:
			
 
				                 value_component.update(
			
--- a/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_bad_duplicate_id.pysnap
+++ b/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_bad_duplicate_id.pysnap
@@ -16,7 +16,7 @@ app:
 
				         exception*
			
 
				           type*
			
 
				             "MyApp.Exception"
			
 
				-          value* (stripped common values)
			
 
				+          value* (stripped event-specific values)
			
 
				             "Test <int>"
			
 
				         exception*
			
 
				           type*
			
--- a/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_bad_missing_parent.pysnap
+++ b/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_bad_missing_parent.pysnap
@@ -10,5 +10,5 @@ app:
 
				       exception*
			
 
				         type*
			
 
				           "MyApp.Exception"
			
 
				-        value* (stripped common values)
			
 
				+        value* (stripped event-specific values)
			
 
				           "Test <int>"
			
--- a/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_bad_out_of_sequence.pysnap
+++ b/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_bad_out_of_sequence.pysnap
@@ -11,7 +11,7 @@ app:
 
				         exception*
			
 
				           type*
			
 
				             "MyApp.Exception"
			
 
				-          value* (stripped common values)
			
 
				+          value* (stripped event-specific values)
			
 
				             "Test <int>"
			
 
				         exception*
			
 
				           type*
			
--- a/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_one_exception.pysnap
+++ b/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_one_exception.pysnap
@@ -10,5 +10,5 @@ app:
 
				       exception*
			
 
				         type*
			
 
				           "MyApp.Exception"
			
 
				-        value* (stripped common values)
			
 
				+        value* (stripped event-specific values)
			
 
				           "Test <int>"
			
--- a/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_one_type_under_nested_groups.pysnap
+++ b/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_one_type_under_nested_groups.pysnap
@@ -10,5 +10,5 @@ app:
 
				       exception*
			
 
				         type*
			
 
				           "MyApp.Exception"
			
 
				-        value* (stripped common values)
			
 
				+        value* (stripped event-specific values)
			
 
				           "Test <int>"
			
--- a/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_one_type_with_different_values.pysnap
+++ b/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_one_type_with_different_values.pysnap
@@ -11,7 +11,7 @@ app:
 
				         exception*
			
 
				           type*
			
 
				             "MyApp.Exception"
			
 
				-          value* (stripped common values)
			
 
				+          value* (stripped event-specific values)
			
 
				             "Test <int>"
			
 
				         exception*
			
 
				           type*
			
--- a/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_one_type_with_similar_values.pysnap
+++ b/tests/sentry/grouping/snapshots/test_variants/test_event_hash_variant/mobile@2021_02_12/exception_groups_one_type_with_similar_values.pysnap
@@ -10,5 +10,5 @@ app:
 
				       exception*
			
 
				         type*
			
 
				           "MyApp.Exception"
			
 
				-        value* (stripped common values)
			
 
				+        value* (stripped event-specific values)
			
 
				           "Test <int>"