Browse Source

ref(releasehealth): Implement check_has_health_data on metrics backend [INGEST-249] (#28729)

 check_has_health_data implemented for metrics
Radu Woinaroski 3 years ago
parent
commit
3d787e22b0

+ 3 - 2
src/sentry/api/serializers/models/project.py

@@ -41,7 +41,6 @@ from sentry.notifications.helpers import (
 )
 from sentry.notifications.types import NotificationSettingOptionValues, NotificationSettingTypes
 from sentry.snuba import discover
-from sentry.snuba.sessions import check_has_health_data
 from sentry.utils import json
 from sentry.utils.compat import zip
 
@@ -336,7 +335,9 @@ class ProjectSerializer(Serializer):
         # call -> check_has_data with those ids and then update our `project_health_data_dict`
         # accordingly
         if check_has_health_data_ids:
-            projects_with_health_data = check_has_health_data(check_has_health_data_ids)
+            projects_with_health_data = releasehealth.check_has_health_data(
+                check_has_health_data_ids
+            )
             for project_id in projects_with_health_data:
                 project_health_data_dict[project_id]["hasHealthData"] = True
 

+ 2 - 2
src/sentry/models/release.py

@@ -1049,8 +1049,8 @@ class Release(Model):
         """Deletes a release if possible or raises a `UnsafeReleaseDeletion`
         exception.
         """
+        from sentry import releasehealth
         from sentry.models import Group, ReleaseFile
-        from sentry.snuba.sessions import check_has_health_data
 
         # we don't want to remove the first_release metadata on the Group, and
         # while people might want to kill a release (maybe to remove files),
@@ -1063,7 +1063,7 @@ class Release(Model):
         # We would need to be able to delete this data from snuba which we
         # can't do yet.
         project_ids = list(self.projects.values_list("id").all())
-        if check_has_health_data([(p[0], self.version) for p in project_ids]):
+        if releasehealth.check_has_health_data([(p[0], self.version) for p in project_ids]):
             raise UnsafeReleaseDeletion(ERR_RELEASE_HEALTH_DATA)
 
         # TODO(dcramer): this needs to happen in the queue as it could be a long

+ 31 - 8
src/sentry/releasehealth/base.py

@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Mapping, Optional, Sequence, Tuple
+from typing import Mapping, Optional, Sequence, Set, Tuple, TypeVar
 
 from typing_extensions import TypedDict
 
@@ -10,17 +10,27 @@ OrganizationId = int
 ReleaseName = str
 EnvironmentName = str
 
+ProjectRelease = Tuple[ProjectId, ReleaseName]
 
-class ReleaseHealthBackend(Service):  # type: ignore
-    """Abstraction layer for all release health related queries"""
+ProjectOrRelease = TypeVar("ProjectOrRelease", ProjectId, ProjectRelease)
+
+
+class CurrentAndPreviousCrashFreeRate(TypedDict):
+    currentCrashFreeRate: Optional[float]
+    previousCrashFreeRate: Optional[float]
 
-    __all__ = ("get_current_and_previous_crash_free_rates", "get_release_adoption")
 
-    class CurrentAndPreviousCrashFreeRate(TypedDict):
-        currentCrashFreeRate: Optional[float]
-        previousCrashFreeRate: Optional[float]
+CurrentAndPreviousCrashFreeRates = Mapping[ProjectId, CurrentAndPreviousCrashFreeRate]
 
-    CurrentAndPreviousCrashFreeRates = Mapping[ProjectId, CurrentAndPreviousCrashFreeRate]
+
+class ReleaseHealthBackend(Service):  # type: ignore
+    """Abstraction layer for all release health related queries"""
+
+    __all__ = (
+        "get_current_and_previous_crash_free_rates",
+        "get_release_adoption",
+        "check_has_health_data",
+    )
 
     def get_current_and_previous_crash_free_rates(
         self,
@@ -100,3 +110,16 @@ class ReleaseHealthBackend(Service):  # type: ignore
         """
 
         raise NotImplementedError()
+
+    def check_has_health_data(
+        self, projects_list: Sequence[ProjectOrRelease]
+    ) -> Set[ProjectOrRelease]:
+        """
+        Function that returns a set of all project_ids or (project, release) if they have health data
+        within the last 90 days based on a list of projects or a list of project, release combinations
+        provided as an arg.
+        Inputs:
+            * projects_list: Contains either a list of project ids or a list of tuple (project_id,
+            release)
+        """
+        raise NotImplementedError()

+ 87 - 3
src/sentry/releasehealth/metrics.py

@@ -1,5 +1,5 @@
 from datetime import datetime, timedelta
-from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Union
 
 import pytz
 from snuba_sdk import BooleanCondition, Column, Condition, Entity, Op, Query
@@ -8,9 +8,11 @@ from snuba_sdk.query import SelectableExpression
 
 from sentry.models.project import Project
 from sentry.releasehealth.base import (
+    CurrentAndPreviousCrashFreeRates,
     EnvironmentName,
     OrganizationId,
     ProjectId,
+    ProjectOrRelease,
     ReleaseHealthBackend,
     ReleaseName,
 )
@@ -38,6 +40,10 @@ def tag_value(org_id: int, name: str) -> int:
     return index  # type: ignore
 
 
+def try_get_tag_value(org_id: int, name: str) -> Optional[int]:
+    return indexer.resolve(org_id, UseCase.TAG_VALUE, name)  # type: ignore
+
+
 def reverse_tag_value(org_id: int, index: int) -> str:
     str_value = indexer.reverse_resolve(org_id, UseCase.TAG_VALUE, index)  # type: ignore
     assert str_value is not None
@@ -56,11 +62,11 @@ class MetricsReleaseHealthBackend(ReleaseHealthBackend):
         previous_end: datetime,
         rollup: int,
         org_id: Optional[int] = None,
-    ) -> ReleaseHealthBackend.CurrentAndPreviousCrashFreeRates:
+    ) -> CurrentAndPreviousCrashFreeRates:
         if org_id is None:
             org_id = self._get_org_id(project_ids)
 
-        projects_crash_free_rate_dict: ReleaseHealthBackend.CurrentAndPreviousCrashFreeRates = {
+        projects_crash_free_rate_dict: CurrentAndPreviousCrashFreeRates = {
             prj: {"currentCrashFreeRate": None, "previousCrashFreeRate": None}
             for prj in project_ids
         }
@@ -340,3 +346,81 @@ class MetricsReleaseHealthBackend(ReleaseHealthBackend):
             rv[project_id, release] = adoption
 
         return rv
+
+    def check_has_health_data(
+        self, projects_list: Sequence[ProjectOrRelease]
+    ) -> Set[ProjectOrRelease]:
+        now = datetime.now(pytz.utc)
+        start = now - timedelta(days=3)
+
+        projects_list = list(projects_list)
+
+        if len(projects_list) == 0:
+            return set()
+
+        includes_releases = isinstance(projects_list[0], tuple)
+
+        if includes_releases:
+            project_ids: List[ProjectId] = [x[0] for x in projects_list]  # type: ignore
+        else:
+            project_ids = projects_list  # type: ignore
+
+        org_id = self._get_org_id(project_ids)
+
+        where_clause = [
+            Condition(Column("org_id"), Op.EQ, org_id),
+            Condition(Column("project_id"), Op.IN, project_ids),
+            Condition(Column("metric_id"), Op.EQ, metric_id(org_id, "session")),
+            Condition(Column("timestamp"), Op.GTE, start),
+            Condition(Column("timestamp"), Op.LT, now),
+        ]
+
+        if includes_releases:
+            releases = [x[1] for x in projects_list]  # type: ignore
+            release_column_name = tag_key(org_id, "release")
+            releases_ids = [
+                release_id
+                for release_id in [try_get_tag_value(org_id, release) for release in releases]
+                if release_id is not None
+            ]
+            where_clause.append(Condition(Column(release_column_name), Op.IN, releases_ids))
+            column_names = ["project_id", release_column_name]
+
+            # def extract_raw_info(row: Mapping[str, Union[int, str]]) -> ProjectOrRelease:
+            #     return row["project_id"], reverse_tag_value(org_id, row.get(release_column_name))  # type: ignore
+
+        else:
+            column_names = ["project_id"]
+
+            # def extract_raw_info(row: Mapping[str, Union[int, str]]) -> ProjectOrRelease:
+            #     return row["project_id"]  # type: ignore
+
+        def extract_raw_info_func(
+            include_releases: bool,
+        ) -> Callable[[Mapping[str, Union[int, str]]], ProjectOrRelease]:
+            def f(row: Mapping[str, Union[int, str]]) -> ProjectOrRelease:
+                if include_releases:
+                    return row["project_id"], reverse_tag_value(org_id, row.get(release_column_name))  # type: ignore
+                else:
+                    return row["project_id"]  # type: ignore
+
+            return f
+
+        extract_raw_info = extract_raw_info_func(includes_releases)
+
+        query_cols = [Column(column_name) for column_name in column_names]
+        group_by_clause = query_cols
+
+        query = Query(
+            dataset=Dataset.Metrics.value,
+            match=Entity("metrics_counters"),
+            select=query_cols,
+            where=where_clause,
+            groupby=group_by_clause,
+        )
+
+        result = raw_snql_query(
+            query, referrer="releasehealth.metrics.check_has_health_data", use_cache=False
+        )
+
+        return {extract_raw_info(raw) for raw in result["data"]}

+ 14 - 3
src/sentry/releasehealth/sessions.py

@@ -1,14 +1,20 @@
 from datetime import datetime
-from typing import Optional, Sequence, Tuple
+from typing import Optional, Sequence, Set, Tuple
 
 from sentry.releasehealth.base import (
+    CurrentAndPreviousCrashFreeRates,
     EnvironmentName,
     OrganizationId,
     ProjectId,
+    ProjectOrRelease,
     ReleaseHealthBackend,
     ReleaseName,
 )
-from sentry.snuba.sessions import _get_release_adoption, get_current_and_previous_crash_free_rates
+from sentry.snuba.sessions import (
+    _check_has_health_data,
+    _get_release_adoption,
+    get_current_and_previous_crash_free_rates,
+)
 
 
 class SessionsReleaseHealthBackend(ReleaseHealthBackend):
@@ -23,7 +29,7 @@ class SessionsReleaseHealthBackend(ReleaseHealthBackend):
         previous_end: datetime,
         rollup: int,
         org_id: Optional[OrganizationId] = None,
-    ) -> ReleaseHealthBackend.CurrentAndPreviousCrashFreeRates:
+    ) -> CurrentAndPreviousCrashFreeRates:
         return get_current_and_previous_crash_free_rates(  # type: ignore
             project_ids=project_ids,
             current_start=current_start,
@@ -43,3 +49,8 @@ class SessionsReleaseHealthBackend(ReleaseHealthBackend):
         return _get_release_adoption(  # type: ignore
             project_releases=project_releases, environments=environments, now=now
         )
+
+    def check_has_health_data(
+        self, projects_list: Sequence[ProjectOrRelease]
+    ) -> Set[ProjectOrRelease]:
+        return _check_has_health_data(projects_list)  # type: ignore

+ 2 - 2
src/sentry/snuba/sessions.py

@@ -104,7 +104,7 @@ def get_oldest_health_data_for_releases(project_releases):
     return rv
 
 
-def check_has_health_data(projects_list):
+def _check_has_health_data(projects_list):
     """
     Function that returns a set of all project_ids or (project, release) if they have health data
     within the last 90 days based on a list of projects or a list of project, release combinations
@@ -460,7 +460,7 @@ def get_release_health_data_overview(
         # If we're already looking at a 90 day horizont we don't need to
         # fire another query, we can already assume there is no data.
         if summary_stats_period != "90d":
-            has_health_data = check_has_health_data(missing_releases)
+            has_health_data = releasehealth.check_has_health_data(missing_releases)
         else:
             has_health_data = ()
         for key in missing_releases:

+ 2 - 2
tests/sentry/api/serializers/test_project.py

@@ -452,7 +452,7 @@ class ProjectSummarySerializerTest(SnubaTestCase, TestCase):
         assert 24 == len(results[0]["transactionStats"])
         assert [1] == [v[1] for v in results[0]["transactionStats"] if v[1] > 0]
 
-    @mock.patch("sentry.api.serializers.models.project.check_has_health_data")
+    @mock.patch("sentry.api.serializers.models.project.releasehealth.check_has_health_data")
     @mock.patch(
         "sentry.api.serializers.models.project.releasehealth.get_current_and_previous_crash_free_rates"
     )
@@ -475,7 +475,7 @@ class ProjectSummarySerializerTest(SnubaTestCase, TestCase):
 
         check_has_health_data.assert_not_called()  # NOQA
 
-    @mock.patch("sentry.api.serializers.models.project.check_has_health_data")
+    @mock.patch("sentry.api.serializers.models.project.releasehealth.check_has_health_data")
     @mock.patch(
         "sentry.api.serializers.models.project.releasehealth.get_current_and_previous_crash_free_rates"
     )

+ 7 - 6
tests/snuba/sessions/test_sessions.py

@@ -9,7 +9,6 @@ from sentry.releasehealth.metrics import MetricsReleaseHealthBackend
 from sentry.releasehealth.sessions import SessionsReleaseHealthBackend
 from sentry.snuba.sessions import (
     _make_stats,
-    check_has_health_data,
     check_releases_have_health_data,
     get_adjacent_releases_based_on_adoption,
     get_oldest_health_data_for_releases,
@@ -147,7 +146,7 @@ class SnubaSessionsTest(TestCase, SnubaTestCase):
         }
 
     def test_check_has_health_data(self):
-        data = check_has_health_data(
+        data = self.backend.check_has_health_data(
             [(self.project.id, self.session_release), (self.project.id, "dummy-release")]
         )
         assert data == {(self.project.id, self.session_release)}
@@ -180,7 +179,7 @@ class SnubaSessionsTest(TestCase, SnubaTestCase):
                 }
             )
         )
-        data = check_has_health_data([self.project.id, project2.id])
+        data = self.backend.check_has_health_data([self.project.id, project2.id])
         assert data == {self.project.id}
 
     def test_check_has_health_data_without_releases_should_include_sessions_lte_90_days(self):
@@ -201,11 +200,11 @@ class SnubaSessionsTest(TestCase, SnubaTestCase):
                 {"project_id": project2.id, "org_id": project2.organization_id, "status": "exited"}
             )
         )
-        data = check_has_health_data([self.project.id, project2.id])
+        data = self.backend.check_has_health_data([self.project.id, project2.id])
         assert data == {self.project.id, project2.id}
 
     def test_check_has_health_data_does_not_crash_when_sending_projects_list_as_set(self):
-        data = check_has_health_data({self.project.id})
+        data = self.backend.check_has_health_data({self.project.id})
         assert data == {self.project.id}
 
     def test_get_project_releases_by_stability(self):
@@ -568,7 +567,9 @@ class SnubaSessionsTest(TestCase, SnubaTestCase):
 
 
 class SnubaSessionsTestMetrics(ReleaseHealthMetricsTestCase, SnubaSessionsTest):
-    """repeat tests with metrics"""
+    """
+    Same tests as in SnunbaSessionsTest but using the Metrics backendx
+    """
 
     pass