Browse Source

ref(db): Explicitly use replica for platform stats task (#28897)

* ref(db): Explicitly use replica for platform stats task

* ref(db): Add `using_replica` to the base model manager

* Move the `using_replica` to a BaseQuerySet

* Update sentry BaseManager to reflect methods from sentry BaseQuerySet
Michal Kuffa 3 years ago
parent
commit
e1b045c1cb

+ 2 - 2
src/sentry/db/models/manager/base.py

@@ -7,7 +7,7 @@ from typing import Any, Generator, Generic, Mapping, MutableMapping, Optional, S
 from django.conf import settings
 from django.db import router
 from django.db.models import Model
-from django.db.models.manager import Manager
+from django.db.models.manager import BaseManager as DjangoBaseManager
 from django.db.models.signals import class_prepared, post_delete, post_init, post_save
 
 from sentry.db.models.manager import M, make_key
@@ -24,7 +24,7 @@ _local_cache_generation = 0
 _local_cache_enabled = False
 
 
-class BaseManager(Manager, Generic[M]):  # type: ignore
+class BaseManager(DjangoBaseManager.from_queryset(BaseQuerySet), Generic[M]):  # type: ignore
     lookup_handlers = {"iexact": lambda x: x.upper()}
     use_for_related_fields = True
 

+ 8 - 0
src/sentry/db/models/manager/base_query_set.py

@@ -1,5 +1,6 @@
 import abc
 
+from django.db import router
 from django.db.models import QuerySet
 
 from sentry.utils.types import Any
@@ -11,6 +12,13 @@ class BaseQuerySet(QuerySet, abc.ABC):  # type: ignore
     # def values(self, *args, **kwargs):
     #     raise NotImplementedError('Use ``values_list`` instead [performance].')
 
+    def using_replica(self) -> "BaseQuerySet":
+        """
+        Use read replica for this query. Database router is expected to use the
+        `replica=True` hint to make routing decision.
+        """
+        return self.using(router.db_for_read(self.model, replica=True))
+
     def defer(self, *args: Any, **kwargs: Any) -> "BaseQuerySet":
         raise NotImplementedError("Use ``values_list`` instead [performance].")
 

+ 3 - 2
src/sentry/tasks/collect_project_platforms.py

@@ -13,11 +13,12 @@ def collect_project_platforms(**kwargs):
     now = timezone.now()
 
     min_project_id = 0
-    max_project_id = Project.objects.aggregate(x=Max("id"))["x"] or 0
+    max_project_id = Project.objects.using_replica().aggregate(x=Max("id"))["x"] or 0
     step = 1000
     while min_project_id <= max_project_id:
         queryset = (
-            Group.objects.filter(
+            Group.objects.using_replica()
+            .filter(
                 last_seen__gte=now - timedelta(days=1),
                 project__gte=min_project_id,
                 project__lt=min_project_id + step,