Browse Source

ref(metrics): Release health service INGEST-376 (#28598)

Provide a common interface for release health queries, so we can easily
switch between the "sessions" backend and the metrics backend.

Note: The service class in this PR only features a single release health
related function. More functions will be ported in future PRs.

https://getsentry.atlassian.net/browse/INGEST-376
Joris Bayer 3 years ago
parent
commit
31e6dd695d

+ 1 - 0
mypy.ini

@@ -44,6 +44,7 @@ files = src/sentry/api/bases/external_actor.py,
         src/sentry/models/projectoption.py,
         src/sentry/models/useroption.py,
         src/sentry/notifications/**/*.py,
+        src/sentry/releasehealth/**/*.py,
         src/sentry/sentry_metrics/**/*.py,
         src/sentry/shared_integrations/constants.py,
         src/sentry/snuba/outcomes.py,

+ 3 - 3
src/sentry/api/serializers/models/project.py

@@ -8,7 +8,7 @@ from django.db.models import prefetch_related_objects
 from django.db.models.aggregates import Count
 from django.utils import timezone
 
-from sentry import features, options, projectoptions, roles
+from sentry import features, options, projectoptions, releasehealth, roles
 from sentry.api.serializers import Serializer, register, serialize
 from sentry.api.serializers.models.plugin import PluginSerializer
 from sentry.api.serializers.models.team import get_org_roles, get_team_memberships
@@ -41,7 +41,7 @@ from sentry.notifications.helpers import (
 )
 from sentry.notifications.types import NotificationSettingOptionValues, NotificationSettingTypes
 from sentry.snuba import discover
-from sentry.snuba.sessions import check_has_health_data, get_current_and_previous_crash_free_rates
+from sentry.snuba.sessions import check_has_health_data
 from sentry.utils import json
 from sentry.utils.compat import zip
 
@@ -308,7 +308,7 @@ class ProjectSerializer(Serializer):
         current_interval_start = now - (segments * interval)
         previous_interval_start = now - (2 * segments * interval)
 
-        project_health_data_dict = get_current_and_previous_crash_free_rates(
+        project_health_data_dict = releasehealth.get_current_and_previous_crash_free_rates(
             project_ids=project_ids,
             current_start=current_interval_start,
             current_end=now,

+ 4 - 0
src/sentry/conf/server.py

@@ -1359,6 +1359,10 @@ SENTRY_METRICS_SKIP_INTERNAL_PREFIXES = []  # Order this by most frequent prefix
 SENTRY_METRICS_INDEXER = "sentry.sentry_metrics.indexer.mock.MockIndexer"
 SENTRY_METRICS_INDEXER_OPTIONS = {}
 
+# Release Health
+SENTRY_RELEASE_HEALTH = "sentry.releasehealth.sessions.SessionsReleaseHealthBackend"
+SENTRY_RELEASE_HEALTH_OPTIONS = {}
+
 # Render charts on the backend. This uses the Chartcuterie external service.
 SENTRY_CHART_RENDERER = "sentry.charts.chartcuterie.Chartcuterie"
 SENTRY_CHART_RENDERER_OPTIONS = {}

+ 12 - 0
src/sentry/releasehealth/__init__.py

@@ -0,0 +1,12 @@
+from django.conf import settings
+
+from sentry.utils.services import LazyServiceWrapper
+
+from .base import ReleaseHealthBackend
+
+backend = LazyServiceWrapper(
+    ReleaseHealthBackend,
+    settings.SENTRY_RELEASE_HEALTH,
+    settings.SENTRY_RELEASE_HEALTH_OPTIONS,
+)
+backend.expose(locals())

+ 57 - 0
src/sentry/releasehealth/base.py

@@ -0,0 +1,57 @@
+from datetime import datetime
+from typing import Dict, Optional, Sequence
+
+from typing_extensions import TypedDict
+
+from sentry.utils.services import Service
+
+
+class ReleaseHealthBackend(Service):  # type: ignore
+    """Abstraction layer for all release health related queries"""
+
+    __all__ = ("get_current_and_previous_crash_free_rates",)
+
+    class CurrentAndPreviousCrashFreeRate(TypedDict):
+        currentCrashFreeRate: Optional[float]
+        previousCrashFreeRate: Optional[float]
+
+    CurrentAndPreviousCrashFreeRates = Dict[int, CurrentAndPreviousCrashFreeRate]
+
+    def get_current_and_previous_crash_free_rates(
+        self,
+        project_ids: Sequence[int],
+        current_start: datetime,
+        current_end: datetime,
+        previous_start: datetime,
+        previous_end: datetime,
+        rollup: int,
+        org_id: Optional[int] = None,
+    ) -> CurrentAndPreviousCrashFreeRates:
+        """
+        Function that returns `currentCrashFreeRate` and the `previousCrashFreeRate` of projects
+        based on the inputs provided
+        Inputs:
+            * project_ids
+            * current_start: start interval of currentCrashFreeRate
+            * current_end: end interval of currentCrashFreeRate
+            * previous_start: start interval of previousCrashFreeRate
+            * previous_end: end interval of previousCrashFreeRate
+            * rollup
+        Returns:
+            A dictionary of project_id as key and as value the `currentCrashFreeRate` and the
+            `previousCrashFreeRate`
+
+            As an example:
+            {
+                1: {
+                    "currentCrashFreeRate": 100,
+                    "previousCrashFreeRate": 66.66666666666667
+                },
+                2: {
+                    "currentCrashFreeRate": 50.0,
+                    "previousCrashFreeRate": None
+                },
+                ...
+            }
+        """
+        raise NotImplementedError()

+ 152 - 0
src/sentry/releasehealth/metrics.py

@@ -0,0 +1,152 @@
+from datetime import datetime
+from typing import Dict, Optional, Sequence, Set
+
+from snuba_sdk import Column, Condition, Entity, Op, Query
+from snuba_sdk.expressions import Granularity
+
+from sentry.models.project import Project
+from sentry.releasehealth.base import ReleaseHealthBackend
+from sentry.sentry_metrics import indexer
+from sentry.sentry_metrics.indexer.base import UseCase
+from sentry.snuba.dataset import Dataset
+from sentry.utils.snuba import raw_snql_query
+
+
+def metric_id(org_id: int, name: str) -> int:
+    index = indexer.resolve(org_id, UseCase.TAG_KEY, name)  # type: ignore
+    assert index is not None  # TODO: assert too strong?
+    return index  # type: ignore
+
+
+def tag_key(org_id: int, name: str) -> str:
+    index = indexer.resolve(org_id, UseCase.TAG_KEY, name)  # type: ignore
+    assert index is not None
+    return f"tags[{index}]"
+
+
+def tag_value(org_id: int, name: str) -> int:
+    index = indexer.resolve(org_id, UseCase.TAG_VALUE, name)  # type: ignore
+    assert index is not None
+    return index  # type: ignore
+
+
+def reverse_tag_value(org_id: int, index: int) -> str:
+    str_value = indexer.reverse_resolve(org_id, UseCase.TAG_VALUE, index)  # type: ignore
+    assert str_value is not None
+    return str_value  # type: ignore
+
+
+class MetricsReleaseHealthBackend(ReleaseHealthBackend):
+    """Gets release health results from the metrics dataset"""
+
+    def get_current_and_previous_crash_free_rates(
+        self,
+        project_ids: Sequence[int],
+        current_start: datetime,
+        current_end: datetime,
+        previous_start: datetime,
+        previous_end: datetime,
+        rollup: int,
+        org_id: Optional[int] = None,
+    ) -> ReleaseHealthBackend.CurrentAndPreviousCrashFreeRates:
+        if org_id is None:
+            org_id = self._get_org_id(project_ids)
+
+        projects_crash_free_rate_dict: ReleaseHealthBackend.CurrentAndPreviousCrashFreeRates = {
+            prj: {"currentCrashFreeRate": None, "previousCrashFreeRate": None}
+            for prj in project_ids
+        }
+
+        previous = self._get_crash_free_rate_data(
+            org_id,
+            project_ids,
+            previous_start,
+            previous_end,
+            rollup,
+        )
+
+        for project_id, project_data in previous.items():
+            projects_crash_free_rate_dict[project_id][
+                "previousCrashFreeRate"
+            ] = self._compute_crash_free_rate(project_data)
+
+        current = self._get_crash_free_rate_data(
+            org_id,
+            project_ids,
+            current_start,
+            current_end,
+            rollup,
+        )
+
+        for project_id, project_data in current.items():
+            projects_crash_free_rate_dict[project_id][
+                "currentCrashFreeRate"
+            ] = self._compute_crash_free_rate(project_data)
+
+        return projects_crash_free_rate_dict
+
+    @staticmethod
+    def _get_org_id(project_ids: Sequence[int]) -> int:
+        projects = Project.objects.get_many_from_cache(project_ids)
+        org_ids: Set[int] = {project.organization_id for project in projects}
+        if len(org_ids) != 1:
+            raise ValueError("Expected projects to be from the same organization")
+
+        return org_ids.pop()
+
+    @staticmethod
+    def _get_crash_free_rate_data(
+        org_id: int,
+        project_ids: Sequence[int],
+        start: datetime,
+        end: datetime,
+        rollup: int,
+    ) -> Dict[int, Dict[str, float]]:
+
+        data: Dict[int, Dict[str, float]] = {}
+
+        session_status = tag_key(org_id, "session.status")
+
+        count_query = Query(
+            dataset=Dataset.Metrics.value,
+            match=Entity("metrics_counters"),
+            select=[Column("value")],
+            where=[
+                Condition(Column("org_id"), Op.EQ, org_id),
+                Condition(Column("project_id"), Op.IN, project_ids),
+                Condition(Column("metric_id"), Op.EQ, metric_id(org_id, "session")),
+                Condition(Column("timestamp"), Op.GTE, start),
+                Condition(Column("timestamp"), Op.LT, end),
+            ],
+            groupby=[
+                Column("project_id"),
+                Column(session_status),
+            ],
+            granularity=Granularity(rollup),
+        )
+
+        count_data = raw_snql_query(
+            count_query, referrer="releasehealth.metrics.get_crash_free_data", use_cache=False
+        )["data"]
+
+        for row in count_data:
+            project_data = data.setdefault(row["project_id"], {})
+            tag_value = reverse_tag_value(org_id, row[session_status])
+            project_data[tag_value] = row["value"]
+
+        return data
+
+    @staticmethod
+    def _compute_crash_free_rate(data: Dict[str, float]) -> Optional[float]:
+        total_session_count = data.get("init", 0)
+        crash_count = data.get("crashed", 0)
+
+        if total_session_count == 0:
+            return None
+
+        crash_free_rate = 1.0 - (crash_count / total_session_count)
+
+        # If crash count is larger than total session count for some reason
+        crash_free_rate = 100 * max(0.0, crash_free_rate)
+
+        return crash_free_rate

+ 28 - 0
src/sentry/releasehealth/sessions.py

@@ -0,0 +1,28 @@
+from datetime import datetime
+from typing import Optional, Sequence
+
+from sentry.releasehealth.base import ReleaseHealthBackend
+from sentry.snuba.sessions import get_current_and_previous_crash_free_rates
+
+
+class SessionsReleaseHealthBackend(ReleaseHealthBackend):
+    """Gets release health results from the session dataset"""
+
+    def get_current_and_previous_crash_free_rates(
+        self,
+        project_ids: Sequence[int],
+        current_start: datetime,
+        current_end: datetime,
+        previous_start: datetime,
+        previous_end: datetime,
+        rollup: int,
+        org_id: Optional[int] = None,
+    ) -> ReleaseHealthBackend.CurrentAndPreviousCrashFreeRates:
+        return get_current_and_previous_crash_free_rates(  # type: ignore
+            project_ids=project_ids,
+            current_start=current_start,
+            current_end=current_end,
+            previous_start=previous_start,
+            previous_end=previous_end,
+            rollup=rollup,
+        )

+ 1 - 0
src/sentry/sentry_metrics/indexer/mock.py

@@ -17,6 +17,7 @@ _STRINGS = {
     "session": 9,
     "staging": 10,
     "user": 11,
+    "init": 12,
 }
 _REVERSE = {v: k for k, v in _STRINGS.items()}
 

+ 1 - 0
src/sentry/snuba/dataset.py

@@ -9,3 +9,4 @@ class Dataset(Enum):
     Outcomes = "outcomes"
     OutcomesRaw = "outcomes_raw"
     Sessions = "sessions"
+    Metrics = "metrics"

+ 66 - 1
src/sentry/testutils/cases.py

@@ -78,6 +78,8 @@ from sentry.models import (
 )
 from sentry.plugins.base import plugins
 from sentry.rules import EventState
+from sentry.sentry_metrics import indexer
+from sentry.sentry_metrics.indexer.base import UseCase
 from sentry.tagstore.snuba import SnubaTagStorage
 from sentry.testutils.helpers.datetime import iso_format
 from sentry.utils import json
@@ -92,7 +94,7 @@ from . import assert_status_code
 from .factories import Factories
 from .fixtures import Fixtures
 from .helpers import AuthProvider, Feature, TaskRunner, override_options, parse_queries
-from .skips import requires_snuba
+from .skips import requires_snuba, requires_snuba_metrics
 
 DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
 
@@ -932,6 +934,69 @@ class SnubaTestCase(BaseTestCase):
         )
 
 
+@requires_snuba_metrics
+class SessionMetricsTestCase(SnubaTestCase):
+    """Store metrics instead of sessions"""
+
+    # NOTE: This endpoint does not exist yet, but we need something alike
+    # because /tests/<dataset>/insert always writes to the default entity
+    # (in the case of metrics, that's "metrics_sets")
+    snuba_endpoint = "/tests/entities/metrics_counters/insert"
+
+    def store_session(self, session):
+        """Mimic relays behavior of always emitting a metric for a started session,
+        and emitting an additional one if the session is fatal
+        https://github.com/getsentry/relay/blob/e3c064e213281c36bde5d2b6f3032c6d36e22520/relay-server/src/actors/envelopes.rs#L357
+        """
+
+        def metric_id(name):
+            res = indexer.resolve(session["org_id"], UseCase.METRIC, name)
+            assert res is not None, name
+            return res
+
+        def tag_key(name):
+            res = indexer.resolve(session["org_id"], UseCase.TAG_KEY, name)
+            assert res is not None, name
+            return res
+
+        def tag_value(name):
+            res = indexer.resolve(session["org_id"], UseCase.TAG_KEY, name)
+            assert res is not None, name
+            return res
+
+        msg = {
+            "org_id": session["org_id"],
+            "project_id": session["project_id"],
+            "metric_id": metric_id("session"),
+            "timestamp": session["started"],
+            "tags": {tag_key("session.status"): tag_value("init")},
+            "type": "c",
+            "value": 1.0,
+            "retention_days": 90,
+        }
+
+        self._send(msg)
+
+        status = session["status"]
+
+        if status in ("abnormal", "crashed"):
+            # Count as fatal
+            msg["tags"][tag_key("session.status")] = tag_value(status)
+            self._send(msg)
+
+        # TODO: emit metric "session.error" of type "set"
+
+    @classmethod
+    def _send(cls, msg):
+        assert (
+            requests.post(
+                settings.SENTRY_SNUBA + cls.snuba_endpoint,
+                data=json.dumps([msg]),
+            ).status_code
+            == 200
+        )
+
+
 class BaseIncidentsTest(SnubaTestCase):
     def create_event(self, timestamp, fingerprint=None, user=None):
         event_id = uuid4().hex

Some files were not shown because too many files changed in this diff