Browse Source

fix(discover): Fix flakey events meps backend tests (#41513)

Updates some of the more flakey tests to wait for clickhouse to be
populated before running test queries. The suspicion is that because
clickhouse isnt synchronous, there's a small chance that test queries
are being run before metrics are fully written, which can cause flakes.
edwardgou-sentry 2 years ago
parent
commit
8223a318d1

+ 39 - 1
src/sentry/testutils/cases.py

@@ -120,6 +120,7 @@ from sentry.search.events.constants import (
 )
 from sentry.sentry_metrics import indexer
 from sentry.sentry_metrics.configuration import UseCaseKey
+from sentry.snuba.metrics.datasource import get_series
 from sentry.tagstore.snuba import SnubaTagStorage
 from sentry.testutils.factories import get_fixture_path
 from sentry.testutils.helpers.datetime import before_now, iso_format
@@ -1440,7 +1441,7 @@ class BaseMetricsLayerTestCase(BaseMetricsTestCase):
         )
 
 
-class MetricsEnhancedPerformanceTestCase(BaseMetricsTestCase, TestCase):
+class MetricsEnhancedPerformanceTestCase(BaseMetricsLayerTestCase, TestCase):
     TYPE_MAP = {
         "metrics_distributions": "distribution",
         "metrics_sets": "set",
@@ -1521,6 +1522,43 @@ class MetricsEnhancedPerformanceTestCase(BaseMetricsTestCase, TestCase):
                 use_case_id=UseCaseKey.PERFORMANCE,
             )
 
+    def wait_for_metric_count(
+        self,
+        project,
+        total,
+        metric="transaction.duration",
+        mri=TransactionMRI.DURATION.value,
+        attempts=2,
+    ):
+        attempt = 0
+        metrics_query = self.build_metrics_query(
+            before_now="1d",
+            granularity="1d",
+            select=[
+                MetricField(
+                    op="count",
+                    metric_mri=mri,
+                ),
+            ],
+            include_series=False,
+        )
+        while attempt < attempts:
+            data = get_series(
+                [project],
+                metrics_query=metrics_query,
+                use_case_id=UseCaseKey.PERFORMANCE,
+            )
+            count = data["groups"][0]["totals"][f"count({metric})"]
+            if count >= total:
+                break
+            attempt += 1
+            time.sleep(0.05)
+
+        if attempt == attempts:
+            assert (
+                False
+            ), f"Could not ensure that {total} metric(s) were persisted within {attempt} attempt(s)."
+
 
 class BaseIncidentsTest(SnubaTestCase):
     def create_event(self, timestamp, fingerprint=None, user=None):

+ 17 - 0
tests/snuba/api/endpoints/test_organization_events_mep.py

@@ -13,6 +13,8 @@ from sentry.models.transaction_threshold import (
     TransactionMetric,
 )
 from sentry.search.events import constants
+from sentry.snuba.metrics.naming_layer.mri import TransactionMRI
+from sentry.snuba.metrics.naming_layer.public import TransactionMetricKey
 from sentry.testutils import MetricsEnhancedPerformanceTestCase
 from sentry.testutils.helpers.datetime import before_now, iso_format
 from sentry.testutils.silo import region_silo_test
@@ -1307,6 +1309,12 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
             "per_page": 50,
         }
 
+        self.wait_for_metric_count(
+            self.project,
+            1,
+            metric="measurements.something_custom",
+            mri="d:transactions/measurements.something_custom@millisecond",
+        )
         response = self.do_request(query)
         assert response.status_code == 200, response.content
         assert len(response.data["data"]) == 1
@@ -1400,6 +1408,12 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
             "per_page": 50,
         }
 
+        self.wait_for_metric_count(
+            self.project,
+            1,
+            metric=TransactionMetricKey.MEASUREMENTS_CLS.value,
+            mri=TransactionMRI.MEASUREMENTS_CLS.value,
+        )
         response = self.do_request(query)
         assert response.status_code == 200, response.content
         assert len(response.data["data"]) == 1
@@ -1603,6 +1617,7 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
             "per_page": 50,
         }
 
+        self.wait_for_metric_count(self.project, 2)
         response = self.do_request(query)
         assert response.status_code == 200, response.content
         assert len(response.data["data"]) == 1
@@ -1640,6 +1655,7 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
             "per_page": 50,
         }
 
+        self.wait_for_metric_count(self.project, 2)
         response = self.do_request(query)
         assert response.status_code == 200, response.content
         assert len(response.data["data"]) == 2
@@ -1913,6 +1929,7 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
             "per_page": 50,
         }
 
+        self.wait_for_metric_count(self.project, 3)
         response = self.do_request(query)
         assert response.status_code == 200, response.content
         assert len(response.data["data"]) == 1