Browse Source

ref: remove suspect resolutions feature (#68562)

feature has been disabled since 2022

<!-- Describe your PR here. -->
anthony sottile 11 months ago
parent
commit
748d727fb5

+ 0 - 2
pyproject.toml

@@ -507,8 +507,6 @@ module = [
     "sentry.utils.sentry_apps.webhooks",
     "sentry.utils.services",
     "sentry.utils.snowflake",
-    "sentry.utils.suspect_resolutions.get_suspect_resolutions",
-    "sentry.utils.suspect_resolutions_releases.get_suspect_resolutions_releases",
     "sentry.web.forms.accounts",
     "sentry.web.frontend.account_identity",
     "sentry.web.frontend.auth_close",

+ 0 - 8
src/sentry/conf/server.py

@@ -406,8 +406,6 @@ INSTALLED_APPS: tuple[str, ...] = (
     "sentry.plugins.sentry_urls.apps.Config",
     "sentry.plugins.sentry_useragents.apps.Config",
     "sentry.plugins.sentry_webhooks.apps.Config",
-    "sentry.utils.suspect_resolutions.apps.Config",
-    "sentry.utils.suspect_resolutions_releases.apps.Config",
     "social_auth",
     "sudo",
     "sentry.eventstream",
@@ -792,8 +790,6 @@ CELERY_IMPORTS = (
     "sentry.dynamic_sampling.tasks.sliding_window_org",
     "sentry.dynamic_sampling.tasks.utils",
     "sentry.dynamic_sampling.tasks.custom_rule_notifications",
-    "sentry.utils.suspect_resolutions.get_suspect_resolutions",
-    "sentry.utils.suspect_resolutions_releases.get_suspect_resolutions_releases",
     "sentry.tasks.derive_code_mappings",
     "sentry.ingest.transaction_clusterer.tasks",
     "sentry.tasks.auto_enable_codecov",
@@ -925,8 +921,6 @@ CELERY_QUEUES_REGION = [
     Queue("unmerge", routing_key="unmerge"),
     Queue("update", routing_key="update"),
     Queue("profiles.process", routing_key="profiles.process"),
-    Queue("get_suspect_resolutions", routing_key="get_suspect_resolutions"),
-    Queue("get_suspect_resolutions_releases", routing_key="get_suspect_resolutions_releases"),
     Queue("replays.ingest_replay", routing_key="replays.ingest_replay"),
     Queue("replays.delete_replay", routing_key="replays.delete_replay"),
     Queue("counters-0", routing_key="counters-0"),
@@ -2026,8 +2020,6 @@ SENTRY_FEATURES: dict[str, bool | None] = {
     "projects:span-metrics-extraction-all-modules": False,
     "projects:span-metrics-extraction-resource": False,
     "projects:discard-transaction": False,
-    # Enable suspect resolutions feature
-    "projects:suspect-resolutions": False,
     # Controls whether or not the relocation endpoints can be used.
     "relocation:enabled": False,
     # NOTE: Don't add feature defaults down here! Please add them in their associated

+ 0 - 1
src/sentry/features/temporary.py

@@ -289,7 +289,6 @@ def register_temporary_features(manager: FeatureManager):
     manager.add("projects:span-metrics-extraction-all-modules", ProjectFeature, FeatureHandlerStrategy.INTERNAL)
     manager.add("projects:span-metrics-extraction-ga-modules", ProjectFeature, FeatureHandlerStrategy.INTERNAL)
     manager.add("projects:span-metrics-extraction-resource", ProjectFeature, FeatureHandlerStrategy.INTERNAL)
-    manager.add("projects:suspect-resolutions", ProjectFeature, FeatureHandlerStrategy.REMOTE)
 
     # Project plugin features
     manager.add("projects:plugins", ProjectPluginFeature, FeatureHandlerStrategy.INTERNAL)

+ 0 - 5
src/sentry/utils/suspect_resolutions/__init__.py

@@ -1,5 +0,0 @@
-from .analytics import *  # NOQA
-
-# make sure to increment this when making changes to anything within the 'suspect_resolutions' directory
-# keeps track of changes to how we process suspect commits, so we can filter out analytics events by the algo version
-ALGO_VERSION = "0.0.5"

+ 0 - 24
src/sentry/utils/suspect_resolutions/analytics.py

@@ -1,24 +0,0 @@
-from sentry import analytics
-
-
-class SuspectResolutionEvaluation(analytics.Event):
-    type = "suspect_resolution.evaluation"
-
-    attributes = (
-        analytics.Attribute("algo_version"),
-        analytics.Attribute("resolved_group_id"),
-        analytics.Attribute("candidate_group_id"),
-        analytics.Attribute("resolved_group_resolution_type"),
-        analytics.Attribute("pearson_r_coefficient"),
-        analytics.Attribute("pearson_r_start_time"),
-        analytics.Attribute("pearson_r_end_time"),
-        analytics.Attribute("pearson_r_resolution_time"),
-        analytics.Attribute("is_commit_correlated"),
-        analytics.Attribute("resolved_issue_release_ids"),
-        analytics.Attribute("candidate_issue_release_ids"),
-        analytics.Attribute("resolved_issue_total_events"),
-        analytics.Attribute("candidate_issue_total_events"),
-    )
-
-
-analytics.register(SuspectResolutionEvaluation)

+ 0 - 8
src/sentry/utils/suspect_resolutions/apps.py

@@ -1,8 +0,0 @@
-from django.apps import AppConfig
-
-
-class Config(AppConfig):
-    name = "sentry.utils.suspect_resolutions"
-
-    def ready(self) -> None:
-        from .get_suspect_resolutions import get_suspect_resolutions  # NOQA

+ 0 - 76
src/sentry/utils/suspect_resolutions/commit_correlation.py

@@ -1,76 +0,0 @@
-from __future__ import annotations
-
-from collections.abc import Sequence
-from dataclasses import dataclass
-from datetime import datetime, timedelta
-
-from sentry.models.commitfilechange import CommitFileChange
-from sentry.models.group import Group
-from sentry.models.grouprelease import GroupRelease
-from sentry.models.release import Release
-from sentry.models.releasecommit import ReleaseCommit
-
-
-@dataclass
-class CommitCorrelatedResult:
-    is_correlated: bool
-    resolved_issue_release_ids: Sequence[int]
-    candidate_issue_release_ids: Sequence[int]
-
-
-@dataclass
-class ReleaseCommitFileChanges:
-    release_ids: Sequence[int]
-    files_changed: set[str]
-
-
-def is_issue_commit_correlated(
-    resolved_issue: int, candidate_issue: int, project: int
-) -> CommitCorrelatedResult:
-    resolved_issue_time = Group.objects.filter(id=resolved_issue).first().resolved_at
-    resolved_filechanges = get_files_changed_in_releases(
-        resolved_issue_time, resolved_issue, project
-    )
-    candidate_filechanges = get_files_changed_in_releases(
-        resolved_issue_time, candidate_issue, project
-    )
-
-    if (
-        len(resolved_filechanges.files_changed) == 0
-        or len(candidate_filechanges.files_changed) == 0
-    ):
-        return CommitCorrelatedResult(False, [], [])
-
-    return CommitCorrelatedResult(
-        not resolved_filechanges.files_changed.isdisjoint(candidate_filechanges.files_changed),
-        resolved_filechanges.release_ids,
-        candidate_filechanges.release_ids,
-    )
-
-
-def get_files_changed_in_releases(
-    resolved_issue_time: datetime, issue_id: int, project_id: int
-) -> ReleaseCommitFileChanges:
-    releases = list(
-        Release.objects.filter(
-            id__in=GroupRelease.objects.filter(
-                group_id=issue_id,
-                project_id=project_id,
-            ).values_list("release_id", flat=True),
-            date_added__gte=(resolved_issue_time - timedelta(hours=5)),
-        )
-    )
-    if len(releases) == 0:
-        return ReleaseCommitFileChanges([], set())
-
-    files_changed_in_releases = set(
-        CommitFileChange.objects.filter(
-            commit_id__in=ReleaseCommit.objects.filter(release__in=releases).values_list(
-                "commit_id", flat=True
-            )
-        )
-        .values_list("filename", flat=True)
-        .distinct()
-    )
-
-    return ReleaseCommitFileChanges([release.id for release in releases], files_changed_in_releases)

+ 0 - 108
src/sentry/utils/suspect_resolutions/get_suspect_resolutions.py

@@ -1,108 +0,0 @@
-from collections.abc import Sequence
-from datetime import timedelta
-
-from django.utils import timezone
-
-from sentry import features
-from sentry.models.activity import Activity
-from sentry.models.group import Group, GroupStatus
-from sentry.signals import issue_resolved
-from sentry.silo import SiloMode
-from sentry.tasks.base import instrumented_task
-from sentry.types.activity import ActivityType
-from sentry.utils.suspect_resolutions import ALGO_VERSION, analytics
-from sentry.utils.suspect_resolutions.commit_correlation import is_issue_commit_correlated
-from sentry.utils.suspect_resolutions.metric_correlation import is_issue_error_rate_correlated
-
-
-@issue_resolved.connect(weak=False)
-def record_suspect_resolutions(
-    organization_id, project, group, user, resolution_type, **kwargs
-) -> None:
-    if features.has("projects:suspect-resolutions", project):
-        if (
-            resolution_type == "in_next_release"
-            or resolution_type == "in_release"
-            or resolution_type == "with_commit"
-            or resolution_type == "in_commit"
-        ):
-            get_suspect_resolutions.apply_async(
-                kwargs={"resolved_issue_id": group.id},
-                eta=timezone.now() + timedelta(hours=1),
-                expires=timezone.now() + timedelta(hours=1, minutes=30),
-            )
-        else:
-            get_suspect_resolutions.delay(group.id)
-
-
-@instrumented_task(
-    name="sentry.tasks.get_suspect_resolutions",
-    queue="get_suspect_resolutions",
-    silo_mode=SiloMode.REGION,
-)
-def get_suspect_resolutions(resolved_issue_id: int, **kwargs) -> Sequence[int]:
-    resolved_issue = Group.objects.get(id=resolved_issue_id)
-    latest_resolved_activity = (
-        Activity.objects.filter(
-            group=resolved_issue,
-            type__in=(
-                ActivityType.SET_RESOLVED.value,
-                ActivityType.SET_RESOLVED_IN_COMMIT.value,
-                ActivityType.SET_RESOLVED_IN_PULL_REQUEST.value,
-                ActivityType.SET_RESOLVED_IN_RELEASE.value,
-            ),
-        )
-        .order_by("-datetime")
-        .values_list("type", flat=True)
-        .first()
-    )
-    latest_resolved_activity_type = (
-        ActivityType(latest_resolved_activity).name if latest_resolved_activity else None
-    )
-
-    if resolved_issue.status != GroupStatus.RESOLVED or latest_resolved_activity is None:
-        return []
-
-    suspect_issue_candidates = list(
-        Group.objects.filter(
-            status=GroupStatus.UNRESOLVED,
-            project=resolved_issue.project,
-            last_seen__lte=(resolved_issue.last_seen + timedelta(hours=1)),
-            last_seen__gte=(resolved_issue.last_seen - timedelta(hours=1)),
-        ).exclude(id=resolved_issue.id)[:100]
-    )
-
-    result = is_issue_error_rate_correlated(resolved_issue, suspect_issue_candidates)
-
-    if result is None:
-        return []
-
-    correlated_issue_ids = []
-    for metric_correlation_result in result.candidate_metric_correlations:
-        commit_correlation = is_issue_commit_correlated(
-            resolved_issue.id,
-            metric_correlation_result.candidate_suspect_resolution_id,
-            resolved_issue.project.id,
-        )
-
-        if metric_correlation_result.is_correlated and commit_correlation.is_correlated:
-            correlated_issue_ids.append(metric_correlation_result.candidate_suspect_resolution_id)
-
-        analytics.record(
-            "suspect_resolution.evaluation",
-            algo_version=ALGO_VERSION,
-            resolved_group_id=resolved_issue.id,
-            candidate_group_id=metric_correlation_result.candidate_suspect_resolution_id,
-            resolved_group_resolution_type=latest_resolved_activity_type,
-            pearson_r_coefficient=metric_correlation_result.coefficient,
-            pearson_r_start_time=result.correlation_start_time,
-            pearson_r_end_time=result.correlation_end_time,
-            pearson_r_resolution_time=result.issue_resolved_time,
-            is_commit_correlated=commit_correlation.is_correlated,
-            resolved_issue_release_ids=commit_correlation.resolved_issue_release_ids,
-            candidate_issue_release_ids=commit_correlation.candidate_issue_release_ids,
-            resolved_issue_total_events=metric_correlation_result.resolved_issue_total_events,
-            candidate_issue_total_events=metric_correlation_result.candidate_issue_total_events,
-        )
-
-    return correlated_issue_ids

+ 0 - 91
src/sentry/utils/suspect_resolutions/metric_correlation.py

@@ -1,91 +0,0 @@
-from collections.abc import Sequence
-from dataclasses import dataclass
-from datetime import datetime, timedelta
-
-from sentry import tsdb
-from sentry.models.group import Group
-from sentry.tsdb.base import TSDBModel
-
-
-@dataclass
-class CandidateMetricCorrResult:
-    candidate_suspect_resolution_id: int
-    is_correlated: bool
-    coefficient: float
-    candidate_issue_total_events: int
-    resolved_issue_total_events: int
-
-
-@dataclass
-class IssueReleaseMetricCorrResult:
-    candidate_metric_correlations: Sequence[CandidateMetricCorrResult]
-    issue_resolved_time: datetime
-    correlation_start_time: datetime
-    correlation_end_time: datetime
-
-
-def is_issue_error_rate_correlated(
-    resolved_issue: Group, candidate_suspect_resolutions: list[Group]
-) -> IssueReleaseMetricCorrResult | None:
-    if (
-        not resolved_issue
-        or not resolved_issue.resolved_at
-        or len(candidate_suspect_resolutions) == 0
-    ):
-        return None
-
-    resolution_time = resolved_issue.resolved_at
-
-    start_time = resolution_time - timedelta(hours=5)
-    end_time = resolution_time + timedelta(hours=1)
-
-    data = tsdb.backend.get_range(
-        model=TSDBModel.group,
-        keys=[resolved_issue.id] + [csr.id for csr in candidate_suspect_resolutions],
-        rollup=600,
-        start=start_time,
-        end=end_time,
-        tenant_ids={"organization_id": resolved_issue.project.organization_id},
-    )
-
-    x = [events for _, events in data[resolved_issue.id]]
-    y = {csr.id: [events for _, events in data[csr.id]] for csr in candidate_suspect_resolutions}
-
-    resolved_issue_total_events = sum(x)
-    candidate_issue_total_events = {csr: sum(events) for csr, events in y.items()}
-
-    coefficients = {csr_id: calculate_pearson_correlation_coefficient(x, y[csr_id]) for csr_id in y}
-
-    results = [
-        CandidateMetricCorrResult(
-            candidate_suspect_resolution_id=csr_id,
-            is_correlated=coefficient > 0.4,
-            coefficient=coefficient,
-            candidate_issue_total_events=candidate_issue_total_events[csr_id],
-            resolved_issue_total_events=resolved_issue_total_events,
-        )
-        for (csr_id, coefficient) in coefficients.items()
-    ]
-
-    return IssueReleaseMetricCorrResult(results, resolution_time, start_time, end_time)
-
-
-def calculate_pearson_correlation_coefficient(x: Sequence[int], y: Sequence[int]) -> float:
-    # source: https://inside-machinelearning.com/en/pearson-formula-in-python-linear-correlation-coefficient/
-    if len(x) == 0 or len(y) == 0:
-        return 0.0
-
-    mean_x = sum(x) / len(x)
-    mean_y = sum(y) / len(y)
-
-    cov = sum((a - mean_x) * (b - mean_y) for (a, b) in zip(x, y)) / len(x)
-
-    st_dev_x = (sum((a - mean_x) ** 2 for a in x) / len(x)) ** 0.5
-    st_dev_y = (sum((b - mean_y) ** 2 for b in y) / len(y)) ** 0.5
-
-    st_dev_x_y = st_dev_x * st_dev_y
-
-    if st_dev_x_y == 0 or st_dev_x_y == 0.0:
-        return 0.0
-
-    return float(cov / st_dev_x_y)

+ 0 - 35
src/sentry/utils/suspect_resolutions/resolved_in_active_release.py

@@ -1,35 +0,0 @@
-from datetime import timedelta
-
-from django.utils import timezone
-
-from sentry.models.deploy import Deploy
-from sentry.models.group import Group, GroupStatus
-from sentry.models.release import Release
-
-
-def is_resolved_issue_within_active_release(issue: Group) -> bool:
-    if issue is None or issue.status != GroupStatus.RESOLVED or issue.get_last_release() is None:
-        return False
-
-    latest_release_version_issue = issue.get_last_release()
-
-    latest_release_issue = Release.objects.filter(
-        version=latest_release_version_issue, organization_id=issue.project.organization.id
-    )
-
-    if len(latest_release_issue) == 0:
-        return False
-
-    latest_deploy_release: Deploy = (
-        Deploy.objects.filter(release_id=latest_release_issue.first().id)
-        .order_by("-date_finished")
-        .first()
-        or Deploy.objects.filter(id=latest_release_issue.first().last_deploy_id).first()
-    )
-
-    if not latest_deploy_release:
-        return False
-
-    now_minus_1_hour = timezone.now() - timedelta(hours=1.0)
-
-    return bool(now_minus_1_hour <= latest_deploy_release.date_finished <= timezone.now())

Some files were not shown because too many files changed in this diff