Browse Source

ref(onboarding): Add function to record first event per project with min stack trace -(#42208)

Priscila Oliveira 2 years ago
parent
commit
ce841204ef

+ 1 - 1
migrations_lockfile.txt

@@ -6,5 +6,5 @@ To resolve this, rebase against latest master and regenerate your migration. Thi
 will then be regenerated, and you should be able to merge without conflicts.
 
 nodestore: 0002_nodestore_no_dictfield
-sentry: 0346_add_flags_field_to_team_model
+sentry: 0347_add_project_has_minified_stack_trace_flag
 social_auth: 0001_initial

+ 26 - 1
src/sentry/event_manager.py

@@ -25,6 +25,7 @@ from typing import (
     cast,
 )
 
+import pytz
 import sentry_sdk
 from django.conf import settings
 from django.core.cache import cache
@@ -116,7 +117,12 @@ from sentry.quotas.base import index_data_category
 from sentry.ratelimits.sliding_windows import Quota, RedisSlidingWindowRateLimiter, RequestedQuota
 from sentry.reprocessing2 import is_reprocessed_event, save_unprocessed_event
 from sentry.shared_integrations.exceptions import ApiError
-from sentry.signals import first_event_received, first_transaction_received, issue_unresolved
+from sentry.signals import (
+    first_event_received,
+    first_event_with_minified_stack_trace_received,
+    first_transaction_received,
+    issue_unresolved,
+)
 from sentry.tasks.commits import fetch_commits
 from sentry.tasks.integrations import kick_off_status_syncs
 from sentry.tasks.process_buffer import buffer_incr
@@ -127,6 +133,7 @@ from sentry.utils import json, metrics, redis
 from sentry.utils.cache import cache_key_for_event
 from sentry.utils.canonical import CanonicalKeyDict
 from sentry.utils.dates import to_datetime, to_timestamp
+from sentry.utils.event import has_event_minified_stack_trace
 from sentry.utils.metrics import MutableTags
 from sentry.utils.outcomes import Outcome, track_outcome
 from sentry.utils.performance_issues.performance_detection import (
@@ -136,6 +143,12 @@ from sentry.utils.performance_issues.performance_detection import (
 )
 from sentry.utils.safe import get_path, safe_execute, setdefault_path, trim
 
+# Used to determine if we should or not record an analytic data
+# for a first event of a project with a minified stack trace
+START_DATE_TRACKING_FIRST_EVENT_WITH_MINIFIED_STACK_TRACE_PER_PROJ = datetime(
+    2022, 12, 14, tzinfo=pytz.UTC
+)
+
 if TYPE_CHECKING:
     from sentry.eventstore.models import Event
 
@@ -599,6 +612,18 @@ class EventManager:
                     project=project, event=job["event"], sender=Project
                 )
 
+            if (
+                has_event_minified_stack_trace(job["event"])
+                and not project.flags.has_minified_stack_trace
+                # We only want to record events from projects created after 2022-12-14,
+                # otherwise amplitude would receive a large amount of data in a short period of time
+                and project.date_added
+                > START_DATE_TRACKING_FIRST_EVENT_WITH_MINIFIED_STACK_TRACE_PER_PROJ
+            ):
+                first_event_with_minified_stack_trace_received.send_robust(
+                    project=project, event=job["event"], sender=Project
+                )
+
         if is_reprocessed:
             safe_execute(
                 reprocessing2.buffered_delete_old_primary_hash,

+ 60 - 0
src/sentry/migrations/0347_add_project_has_minified_stack_trace_flag.py

@@ -0,0 +1,60 @@
+# Generated by Django 2.2.28 on 2022-12-19 10:34
+
+from django.db import migrations
+
+import bitfield.models
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production. For
+    # the most part, this should only be used for operations where it's safe to run the migration
+    # after your code has deployed. So this should not be used for most operations that alter the
+    # schema of a table.
+    # Here are some things that make sense to mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that they can
+    #   be monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   have ops run this and not block the deploy. Note that while adding an index is a schema
+    #   change, it's completely safe to run the operation after the code has deployed.
+    is_dangerous = False
+
+    dependencies = [
+        ("sentry", "0346_add_flags_field_to_team_model"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="project",
+            name="flags",
+            field=bitfield.models.BitField(
+                (
+                    ("has_releases", "This Project has sent release data"),
+                    ("has_issue_alerts_targeting", "This Project has issue alerts targeting"),
+                    ("has_transactions", "This Project has sent transactions"),
+                    ("has_alert_filters", "This Project has filters"),
+                    ("has_sessions", "This Project has sessions"),
+                    ("has_profiles", "This Project has sent profiles"),
+                    ("has_replays", "This Project has sent replays"),
+                    (
+                        "spike_protection_error_currently_active",
+                        "spike_protection_error_currently_active",
+                    ),
+                    (
+                        "spike_protection_transaction_currently_active",
+                        "spike_protection_transaction_currently_active",
+                    ),
+                    (
+                        "spike_protection_attachment_currently_active",
+                        "spike_protection_attachment_currently_active",
+                    ),
+                    (
+                        "has_minified_stack_trace",
+                        "This Project has event with minified stack trace",
+                    ),
+                ),
+                default=10,
+                null=True,
+            ),
+        ),
+    ]

+ 1 - 0
src/sentry/models/project.py

@@ -152,6 +152,7 @@ class Project(Model, PendingDeletionMixin, SnowflakeIdMixin):
                 "spike_protection_attachment_currently_active",
                 "spike_protection_attachment_currently_active",
             ),
+            ("has_minified_stack_trace", "This Project has event with minified stack trace"),
         ),
         default=10,
         null=True,

+ 41 - 38
src/sentry/receivers/onboarding.py

@@ -20,6 +20,7 @@ from sentry.signals import (
     event_processed,
     first_event_pending,
     first_event_received,
+    first_event_with_minified_stack_trace_received,
     first_profile_received,
     first_replay_received,
     first_transaction_received,
@@ -31,8 +32,10 @@ from sentry.signals import (
     project_created,
     transaction_processed,
 )
+from sentry.utils.event import has_event_minified_stack_trace
 from sentry.utils.javascript import has_sourcemap
-from sentry.utils.safe import get_path
+
+logger = logging.getLogger("sentry")
 
 
 def try_mark_onboarding_complete(organization_id):
@@ -70,7 +73,7 @@ def record_new_project(project, user, **kwargs):
                 Organization.objects.get(id=project.organization_id).get_default_owner().id
             )
         except IndexError:
-            logging.getLogger("sentry").warning(
+            logger.warning(
                 "Cannot initiate onboarding for organization (%s) due to missing owners",
                 project.organization_id,
             )
@@ -138,41 +141,12 @@ def record_first_event(project, event, **kwargs):
     try:
         user: APIUser = Organization.objects.get(id=project.organization_id).get_default_owner()
     except IndexError:
-        logging.getLogger("sentry").warning(
+        logger.warning(
             "Cannot record first event for organization (%s) due to missing owners",
             project.organization_id,
         )
         return
 
-    url = None
-
-    # Check for the event url
-    for key, value in event.tags:
-        if key == "url":
-            url = value
-            break
-
-    # Check if an event contains a minified stack trace
-    has_minified_stack_trace = False
-
-    exception_values = get_path(event.data, "exception", "values", filter=True)
-
-    if exception_values:
-        for exception_value in exception_values:
-            if "raw_stacktrace" in exception_value:
-                has_minified_stack_trace = True
-                break
-
-    if has_minified_stack_trace:
-        analytics.record(
-            "first_event_with_minified_stack_trace_for_project.sent",
-            user_id=user.id,
-            organization_id=project.organization_id,
-            project_id=project.id,
-            platform=event.platform,
-            url=url,
-        )
-
     # this event fires once per project
     analytics.record(
         "first_event_for_project.sent",
@@ -180,8 +154,8 @@ def record_first_event(project, event, **kwargs):
         organization_id=project.organization_id,
         project_id=project.id,
         platform=event.platform,
-        url=url,
-        has_minified_stack_trace=has_minified_stack_trace,
+        url=dict(event.tags).get("url", None),
+        has_minified_stack_trace=has_event_minified_stack_trace(event),
     )
 
     if rows_affected or created:
@@ -333,7 +307,7 @@ def record_release_received(project, event, **kwargs):
         try:
             user: APIUser = Organization.objects.get(id=project.organization_id).get_default_owner()
         except IndexError:
-            logging.getLogger("sentry").warning(
+            logger.warning(
                 "Cannot record release received for organization (%s) due to missing owners",
                 project.organization_id,
             )
@@ -372,7 +346,7 @@ def record_user_context_received(project, event, **kwargs):
                     id=project.organization_id
                 ).get_default_owner()
             except IndexError:
-                logging.getLogger("sentry").warning(
+                logger.warning(
                     "Cannot record user context received for organization (%s) due to missing owners",
                     project.organization_id,
                 )
@@ -388,6 +362,35 @@ def record_user_context_received(project, event, **kwargs):
 
 
 event_processed.connect(record_user_context_received, weak=False)
+
+
+@first_event_with_minified_stack_trace_received.connect(weak=False)
+def record_event_with_first_minified_stack_trace_for_project(project, event, **kwargs):
+    try:
+        user: APIUser = Organization.objects.get(id=project.organization_id).get_default_owner()
+    except IndexError:
+        logger.warning(
+            "Cannot record first event for organization (%s) due to missing owners",
+            project.organization_id,
+        )
+        return
+
+    # First, only enter this logic if we've never seen a minified stack trace before
+    # This guarantees us that this analytics event will only be ever sent once.
+    if not project.flags.has_minified_stack_trace:
+        analytics.record(
+            "first_event_with_minified_stack_trace_for_project.sent",
+            user_id=user.id,
+            organization_id=project.organization_id,
+            project_id=project.id,
+            platform=event.platform,
+            url=dict(event.tags).get("url", None),
+        )
+
+        # We set the flag `has_minified_stack_trace` for the project
+        project.update(flags=F("flags").bitor(Project.flags.has_minified_stack_trace))
+
+
 transaction_processed.connect(record_user_context_received, weak=False)
 
 
@@ -406,7 +409,7 @@ def record_sourcemaps_received(project, event, **kwargs):
         try:
             user: APIUser = Organization.objects.get(id=project.organization_id).get_default_owner()
         except IndexError:
-            logging.getLogger("sentry").warning(
+            logger.warning(
                 "Cannot record sourcemaps received for organization (%s) due to missing owners",
                 project.organization_id,
             )
@@ -491,7 +494,7 @@ def record_issue_tracker_used(plugin, project, user, **kwargs):
         try:
             default_user_id = project.organization.get_default_owner().id
         except IndexError:
-            logging.getLogger("sentry").warning(
+            logger.warning(
                 "Cannot record issue tracker used for organization (%s) due to missing owners",
                 project.organization_id,
             )

+ 3 - 0
src/sentry/signals.py

@@ -116,6 +116,9 @@ project_created = BetterSignal(providing_args=["project", "user", "default_rules
 first_event_pending = BetterSignal(providing_args=["project", "user"])
 
 first_event_received = BetterSignal(providing_args=["project", "event"])
+# We use signal for consistency with other places but
+# would like to get rid of the signal since it doesn’t serve any purpose
+first_event_with_minified_stack_trace_received = BetterSignal(providing_args=["project", "event"])
 first_transaction_received = BetterSignal(providing_args=["project", "event"])
 first_profile_received = BetterSignal(providing_args=["project"])
 first_replay_received = BetterSignal(providing_args=["project"])

+ 13 - 0
src/sentry/utils/event.py

@@ -0,0 +1,13 @@
+from sentry.utils.safe import get_path
+
+
+# Check if an event contains a minified stack trace (source maps for javascript)
+def has_event_minified_stack_trace(event):
+    exception_values = get_path(event.data, "exception", "values", filter=True)
+
+    if exception_values:
+        for exception_value in exception_values:
+            if "stacktrace" in exception_value and "raw_stacktrace" in exception_value:
+                return True
+
+    return False

+ 141 - 0
tests/sentry/receivers/test_onboarding.py

@@ -1,3 +1,6 @@
+from unittest.mock import patch
+
+import pytest
 from django.utils import timezone
 
 from sentry.models import (
@@ -482,3 +485,141 @@ class OrganizationOnboardingTaskTest(TestCase):
             ).count()
             == 1
         )
+
+    @patch("sentry.analytics.record")
+    def test_first_event_without_minified_stack_trace_received(self, record_analytics):
+        """
+        Test that an analytics event is NOT recorded when
+        there no event with minified stack trace is received
+        """
+        now = timezone.now()
+        project = self.create_project(first_event=now)
+        project_created.send(project=project, user=self.user, sender=type(project))
+        data = load_data("javascript")
+        self.store_event(
+            data=data,
+            project_id=project.id,
+        )
+
+        with pytest.raises(AssertionError):
+            record_analytics.assert_called_with(
+                "first_event_with_minified_stack_trace_for_project.sent",
+                user_id=self.user.id,
+                organization_id=project.organization_id,
+                project_id=project.id,
+                platform="javascript",
+                url="http://localhost:3000",
+            )
+
+    @patch("sentry.analytics.record")
+    def test_first_event_with_minified_stack_trace_received(self, record_analytics):
+        """
+        Test that an analytics event is recorded when
+        a first event with minified stack trace is received
+        """
+        now = timezone.now()
+        project = self.create_project(first_event=now)
+        project_created.send(project=project, user=self.user, sender=type(project))
+        url = "http://localhost:3000"
+        data = load_data("javascript")
+        data["tags"] = [("url", url)]
+        data["exception"] = {
+            "values": [
+                {
+                    **data["exception"]["values"][0],
+                    "raw_stacktrace": {
+                        "frames": [
+                            {
+                                "function": "o",
+                                "filename": "/_static/dist/sentry/chunks/vendors-node_modules_emotion_is-prop-valid_node_modules_emotion_memoize_dist_memoize_browser_-4fe4bd.255071ceadabfb67483c.js",
+                                "abs_path": "https://s1.sentry-cdn.com/_static/dist/sentry/chunks/vendors-node_modules_emotion_is-prop-valid_node_modules_emotion_memoize_dist_memoize_browser_-4fe4bd.255071ceadabfb67483c.js",
+                                "lineno": 2,
+                                "colno": 37098,
+                                "pre_context": [
+                                    "/*! For license information please see vendors-node_modules_emotion_is-prop-valid_node_modules_emotion_memoize_dist_memoize_browser_-4fe4bd. {snip}"
+                                ],
+                                "context_line": "{snip} .apply(this,arguments);const i=o.map((e=>c(e,t)));return e.apply(this,i)}catch(e){throw l(),(0,i.$e)((n=>{n.addEventProcessor((e=>(t.mechani {snip}",
+                                "post_context": [
+                                    "//# sourceMappingURL=../sourcemaps/vendors-node_modules_emotion_is-prop-valid_node_modules_emotion_memoize_dist_memoize_browser_-4fe4bd.fe32 {snip}"
+                                ],
+                                "in_app": False,
+                            },
+                        ],
+                    },
+                }
+            ]
+        }
+
+        self.store_event(
+            project_id=project.id,
+            data=data,
+        )
+
+        record_analytics.assert_called_with(
+            "first_event_with_minified_stack_trace_for_project.sent",
+            user_id=self.user.id,
+            organization_id=project.organization_id,
+            project_id=project.id,
+            platform=data["platform"],
+            url=url,
+        )
+
+    @patch("sentry.analytics.record")
+    def test_analytic_triggered_only_once_if_multiple_events_with_minified_stack_trace_received(
+        self, record_analytics
+    ):
+        """
+        Test that an analytic event is triggered only once when
+        multiple events with minified stack trace are received
+        """
+        now = timezone.now()
+        project = self.create_project(first_event=now)
+        project_created.send(project=project, user=self.user, sender=type(project))
+        url = "http://localhost:3000"
+        event = load_data("javascript")
+        event["tags"] = [("url", url)]
+        event["exception"] = {
+            "values": [
+                {
+                    **event["exception"]["values"][0],
+                    "raw_stacktrace": {
+                        "frames": [
+                            {
+                                "function": "o",
+                                "filename": "/_static/dist/sentry/chunks/vendors-node_modules_emotion_is-prop-valid_node_modules_emotion_memoize_dist_memoize_browser_-4fe4bd.255071ceadabfb67483c.js",
+                                "abs_path": "https://s1.sentry-cdn.com/_static/dist/sentry/chunks/vendors-node_modules_emotion_is-prop-valid_node_modules_emotion_memoize_dist_memoize_browser_-4fe4bd.255071ceadabfb67483c.js",
+                                "lineno": 2,
+                                "colno": 37098,
+                                "pre_context": [
+                                    "/*! For license information please see vendors-node_modules_emotion_is-prop-valid_node_modules_emotion_memoize_dist_memoize_browser_-4fe4bd. {snip}"
+                                ],
+                                "context_line": "{snip} .apply(this,arguments);const i=o.map((e=>c(e,t)));return e.apply(this,i)}catch(e){throw l(),(0,i.$e)((n=>{n.addEventProcessor((e=>(t.mechani {snip}",
+                                "post_context": [
+                                    "//# sourceMappingURL=../sourcemaps/vendors-node_modules_emotion_is-prop-valid_node_modules_emotion_memoize_dist_memoize_browser_-4fe4bd.fe32 {snip}"
+                                ],
+                                "in_app": False,
+                            },
+                        ],
+                    },
+                }
+            ]
+        }
+
+        # Store first event
+        self.store_event(
+            project_id=project.id,
+            data=event,
+        )
+
+        # Store second event
+        self.store_event(
+            project_id=project.id,
+            data=event,
+        )
+
+        count = 0
+        for call_arg in record_analytics.call_args_list:
+            if "first_event_with_minified_stack_trace_for_project.sent" in call_arg[0]:
+                count += 1
+
+        assert count == 1