Browse Source

fix(release_health): Add new featureflag to return metric results to user (#32904)

Markus Unterwaditzer 2 years ago
parent
commit
7967aea89c
4 changed files with 83 additions and 40 deletions
  1. 2 0
      mypy.ini
  2. 2 0
      src/sentry/conf/server.py
  3. 1 0
      src/sentry/features/__init__.py
  4. 78 40
      src/sentry/release_health/duplex.py

+ 2 - 0
mypy.ini

@@ -139,5 +139,7 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-zstandard]
 ignore_missing_imports = True
+[mypy-msgpack]
+ignore_missing_imports = True
 [mypy-drf_spectacular.utils]
 follow_imports = normal

+ 2 - 0
src/sentry/conf/server.py

@@ -1010,6 +1010,8 @@ SENTRY_FEATURES = {
     "organizations:release-health-check-metrics": False,
     # True if differences between the metrics and sessions backend should be reported
     "organizations:release-health-check-metrics-report": False,
+    # True if the metrics data should be returned as API response (if possible with current data)
+    "organizations:release-health-return-metrics": False,
     # Enable threshold period in metric alert rule builder
     "organizations:metric-alert-threshold-period": False,
     # Enable integration functionality to create and link groups to issues on

+ 1 - 0
src/sentry/features/__init__.py

@@ -114,6 +114,7 @@ default_manager.add("organizations:related-events", OrganizationFeature)
 default_manager.add("organizations:release-comparison-performance", OrganizationFeature, True)
 default_manager.add("organizations:release-health-check-metrics", OrganizationFeature, True)
 default_manager.add("organizations:release-health-check-metrics-report", OrganizationFeature, True)
+default_manager.add("organizations:release-health-return-metrics", OrganizationFeature, True)
 default_manager.add("organizations:reprocessing-v2", OrganizationFeature)
 default_manager.add("organizations:required-email-verification", OrganizationFeature, True)  # NOQA
 default_manager.add("organizations:rule-page", OrganizationFeature)

+ 78 - 40
src/sentry/release_health/duplex.py

@@ -640,6 +640,7 @@ def run_comparison(
     schema: Optional[Schema],
     function_args: Tuple[Any],
     sessions_result: Any,
+    metrics_result: Any,
     sessions_time: datetime,
     sentry_tags: Optional[Mapping[str, str]] = None,
     **kwargs,
@@ -668,9 +669,10 @@ def run_comparison(
         set_extra("delay", delay)
         timing("releasehealth.metrics.delay", delay)
 
-        # We read from the metrics source even if there is no need to compare.
-        with timer("releasehealth.metrics.duration", tags=tags, sample_rate=1.0):
-            metrics_val = metrics_fn(*function_args)
+        if metrics_result is None:
+            # We read from the metrics source even if there is no need to compare.
+            with timer("releasehealth.metrics.duration", tags=tags, sample_rate=1.0):
+                metrics_result = metrics_fn(*function_args)
 
         incr(
             "releasehealth.metrics.check_should_compare",
@@ -683,10 +685,10 @@ def run_comparison(
 
         copy = deepcopy(sessions_result)
 
-        set_context("release-health-duplex-metrics", {"metrics": metrics_val})
+        set_context("release-health-duplex-metrics", {"metrics": metrics_result})
 
         with timer("releasehealth.results-diff.duration", tags=tags, sample_rate=1.0):
-            errors = compare_results(copy, metrics_val, rollup, None, schema)
+            errors = compare_results(copy, metrics_result, rollup, None, schema)
         set_context("release-health-duplex-errors", {"errors": [str(error) for error in errors]})
 
         should_report = features.has(
@@ -765,45 +767,86 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
     def _dispatch_call_inner(
         self,
         fn_name: str,
-        should_compare: Union[bool, Callable[[Any], bool]],
+        should_compare: Union[bool, Callable[[], bool]],
         rollup: Optional[int],
         organization: Optional[Organization],
         schema: Optional[Schema],
         *args: Any,
         sentry_tags: Optional[Mapping[str, str]] = None,
     ) -> ReleaseHealthResult:
+        # Note: having both featureflags enabled at once is deadly for endpoint
+        # performance, but does work.
+        should_return_metrics = organization is not None and features.has(
+            "organizations:release-health-return-metrics", organization
+        )
+        should_check_metrics = organization is not None and features.has(
+            "organizations:release-health-check-metrics", organization
+        )
+        if not isinstance(should_compare, bool):
+            try:
+                should_compare = should_compare()
+            except Exception:
+                capture_exception()
+                should_compare = False
+
+            if not should_compare:
+                # if should_compare=False, then we already know the metrics result is bad and shouldn't return it.
+                should_return_metrics = False
+
         sessions_fn = getattr(self.sessions, fn_name)
+        metrics_fn = getattr(self.metrics, fn_name)
 
         now = datetime.now(pytz.utc)
         tags = {"method": fn_name, "rollup": str(rollup)}
-        with timer("releasehealth.sessions.duration", tags=tags, sample_rate=1.0):
-            ret_val = sessions_fn(*args)
+        incr(
+            "releasehealth.metrics.should_return",
+            tags={"should_return": str(should_return_metrics), **tags},
+        )
 
-        if organization is not None and features.has(
-            "organizations:release-health-check-metrics", organization
-        ):
-            if not isinstance(should_compare, bool):
-                # should compare depends on the session result
-                # evaluate it now
-                should_compare = should_compare(ret_val)
+        sessions_result = None  # get rid of unbound warnings -- this line shouldn't be necessary
+        metrics_result = None
 
+        if should_return_metrics:
+            try:
+                with timer("releasehealth.metrics.duration", tags=tags, sample_rate=1.0):
+                    metrics_result = metrics_fn(*args)
+            except Exception:
+                capture_exception()
+
+        if should_check_metrics or metrics_result is None:
+            with timer("releasehealth.sessions.duration", tags=tags, sample_rate=1.0):
+                sessions_result = sessions_fn(*args)
+
+        if should_check_metrics:
             try:
                 run_comparison.delay(
                     fn_name,
-                    getattr(self.metrics, fn_name),
+                    metrics_fn,
                     should_compare,
                     rollup,
                     organization,
                     schema,
                     function_args=args,
-                    sessions_result=ret_val,
+                    sessions_result=sessions_result,
+                    metrics_result=metrics_result,
                     sessions_time=now,
                     sentry_tags=sentry_tags,
                 )
             except Exception:
                 capture_exception()
 
-        return ret_val
+        if metrics_result is not None:
+            incr(
+                "releasehealth.metrics.should_return",
+                tags={"did_return": "True", **tags},
+            )
+            return metrics_result
+        else:
+            incr(
+                "releasehealth.metrics.should_return",
+                tags={"did_return": "False", **tags},
+            )
+            return sessions_result
 
     if TYPE_CHECKING:
         # Mypy is not smart enough to figure out _dispatch_call is a wrapper
@@ -833,7 +876,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
                 "previousCrashFreeRate": ComparatorType.Ratio,
             }
         }
-        should_compare = lambda _: _coerce_utc(previous_start) > self.metrics_start
+        should_compare = lambda: _coerce_utc(previous_start) > self.metrics_start
 
         if org_id is not None:
             organization = self._org_from_id(org_id)
@@ -874,7 +917,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
         if now is None:
             now = datetime.now(pytz.utc)
 
-        should_compare = lambda _: now - timedelta(hours=24) > self.metrics_start
+        should_compare = lambda: now - timedelta(hours=24) > self.metrics_start
 
         if org_id is not None:
             organization = self._org_from_id(org_id)
@@ -937,7 +980,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
             "query": ComparatorType.Exact,
         }
 
-        should_compare = lambda _: _coerce_utc(query.start) > self.metrics_start
+        should_compare = lambda: _coerce_utc(query.start) > self.metrics_start
 
         organization = self._org_from_id(org_id)
         return self._dispatch_call(  # type: ignore
@@ -965,12 +1008,8 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
             "sessions_upper_bound": ComparatorType.DateTime,
         }
 
-        def should_compare(val: ReleaseSessionsTimeBounds) -> bool:
-            lower_bound = val.get("sessions_lower_bound")
-            if lower_bound is not None:
-                lower_bound_d = parser.parse(lower_bound)
-                return lower_bound_d > self.metrics_start
-            return True
+        now = datetime.now(pytz.utc)
+        should_compare = lambda: now - timedelta(days=90) > self.metrics_start
 
         organization = self._org_from_id(org_id)
 
@@ -996,7 +1035,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
 
         rollup = self.DEFAULT_ROLLUP  # not used
         schema = {ComparatorType.Exact}
-        should_compare = lambda _: now - timedelta(days=90) > self.metrics_start
+        should_compare = lambda: now - timedelta(days=90) > self.metrics_start
         organization = self._org_from_projects(projects_list)
         return self._dispatch_call(  # type: ignore
             "check_has_health_data",
@@ -1018,7 +1057,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
     ) -> Set[ReleaseName]:
         rollup = self.DEFAULT_ROLLUP  # not used
         schema = {ComparatorType.Exact}
-        should_compare = lambda _: _coerce_utc(start) > self.metrics_start
+        should_compare = lambda: _coerce_utc(start) > self.metrics_start
 
         organization = self._org_from_id(organization_id)
 
@@ -1057,7 +1096,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
         if now is None:
             now = datetime.now(pytz.utc)
 
-        should_compare = lambda _: now - timedelta(days=1) > self.metrics_start
+        should_compare = lambda: now - timedelta(days=1) > self.metrics_start
         organization = self._org_from_projects(project_releases)
         return self._dispatch_call(  # type: ignore
             "get_release_health_data_overview",
@@ -1094,7 +1133,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
                 "crash_free_sessions": ComparatorType.Ratio,
             }
         ]
-        should_compare = lambda _: _coerce_utc(start) > self.metrics_start
+        should_compare = lambda: _coerce_utc(start) > self.metrics_start
         organization = self._org_from_projects([project_id])
         return self._dispatch_call(  # type: ignore
             "get_crash_free_breakdown",
@@ -1118,9 +1157,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
 
         schema = ComparatorType.Ignore
 
-        should_compare = (
-            lambda _: datetime.now(timezone.utc) - timedelta(days=3) > self.metrics_start
-        )
+        should_compare = lambda: datetime.now(timezone.utc) - timedelta(days=3) > self.metrics_start
 
         if now is None:
             now = datetime.now(pytz.utc)
@@ -1146,7 +1183,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
 
         rollup = self.DEFAULT_ROLLUP  # TODO check if this is correct ?
         schema = {"*": ComparatorType.DateTime}
-        should_compare = lambda _: now - timedelta(days=90) > self.metrics_start
+        should_compare = lambda: now - timedelta(days=90) > self.metrics_start
         organization = self._org_from_projects(project_releases)
         return self._dispatch_call(  # type: ignore
             "get_oldest_health_data_for_releases",
@@ -1175,7 +1212,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
             stats_period = "24h"
 
         rollup, stats_start, _ = get_rollup_starts_and_buckets(stats_period)
-        should_compare = lambda _: _coerce_utc(stats_start) > self.metrics_start
+        should_compare = lambda: _coerce_utc(stats_start) > self.metrics_start
 
         organization = self._org_from_id(organization_id)
 
@@ -1217,7 +1254,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
                 "*": ComparatorType.Counter,
             },
         )
-        should_compare = lambda _: _coerce_utc(start) > self.metrics_start
+        should_compare = lambda: _coerce_utc(start) > self.metrics_start
         organization = self._org_from_projects([project_id])
         return self._dispatch_call(  # type: ignore
             "get_project_release_stats",
@@ -1247,7 +1284,8 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
         # We verified the correctness of the metrics implementation manually.
         # The results still differ because the sessions impl gets its results
         # from hourly aggregations.
-        should_compare = False
+        schema = ComparatorType.Ignore
+        should_compare = lambda: _coerce_utc(start) > self.metrics_start
 
         organization = self._org_from_projects([project_id])
         return self._dispatch_call(  # type: ignore
@@ -1272,7 +1310,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
         rollup: Optional[int] = None,  # rollup in seconds
     ) -> Sequence[ProjectWithCount]:
         schema = [(ComparatorType.Exact, ComparatorType.Counter)]
-        should_compare = lambda _: _coerce_utc(start) > self.metrics_start
+        should_compare = lambda: _coerce_utc(start) > self.metrics_start
         organization = self._org_from_projects(project_ids)
         return self._dispatch_call(  # type: ignore
             "get_num_sessions_per_project",
@@ -1310,7 +1348,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
 
         rollup, stats_start, _ = get_rollup_starts_and_buckets(stats_period, now=now)
         assert stats_start is not None  # because stats_period is not None
-        should_compare = lambda _: stats_start > self.metrics_start
+        should_compare = lambda: stats_start > self.metrics_start
         organization = self._org_from_projects(project_ids)
 
         return self._dispatch_call(  # type: ignore