organization_activity.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. from functools import reduce
  2. from rest_framework.request import Request
  3. from rest_framework.response import Response
  4. from sentry.api.base import EnvironmentMixin
  5. from sentry.api.bases import OrganizationMemberEndpoint
  6. from sentry.api.paginator import DateTimePaginator
  7. from sentry.api.serializers import OrganizationActivitySerializer, serialize
  8. from sentry.models import Activity, OrganizationMemberTeam, Project
  9. from sentry.types.activity import ActivityType
  10. class OrganizationActivityEndpoint(OrganizationMemberEndpoint, EnvironmentMixin):
  11. def get(self, request: Request, organization, member) -> Response:
  12. # There is an activity record created for both sides of the unmerge
  13. # operation, so we only need to include one of them here to avoid
  14. # showing the same entry twice.
  15. base_qs = Activity.objects.exclude(type=ActivityType.UNMERGE_SOURCE.value).values_list(
  16. "id", flat=True
  17. )
  18. # To make this query efficient, we have to hammer it into a weird format. This table is
  19. # extremely large and if we are making a query across many projects, in a lot of cases
  20. # Postgres decides that the best query plan is to iterate backwards on the datetime index.
  21. # This means it does something close to a table scan to get the results it wants - this can
  22. # be the case even for orgs with less than a page of activity rows, and often results in
  23. # queries that take > 30s, which get killed by stomper.
  24. # To convince Postgres to use the index on `(project_id, datetime)`, it basically needs to
  25. # see queries that filter on a single project id and then order by datetime. So we replicate
  26. # the query for every project and UNION ALL them together to get the candidate set of rows.
  27. # Then we sort these and return the final result. Convoluted, but it improves the query a
  28. # lot.
  29. # To make this work well with pagination, we have to also apply the pagination queries to
  30. # the subqueries.
  31. cursor = self.get_cursor_from_request(request)
  32. paginator = DateTimePaginator(base_qs, order_by="-datetime")
  33. if cursor is not None and cursor.value:
  34. cursor_value = paginator.value_from_cursor(cursor)
  35. else:
  36. cursor_value = 0
  37. base_qs = paginator.build_queryset(cursor_value, False)
  38. project_ids = list(
  39. Project.objects.filter(
  40. organization=organization,
  41. teams__in=OrganizationMemberTeam.objects.filter(organizationmember=member).values(
  42. "team"
  43. ),
  44. ).values_list("id", flat=True)
  45. )
  46. union_qs = Activity.objects.none()
  47. if project_ids:
  48. union_qs = reduce(
  49. lambda qs1, qs2: qs1.union(qs2, all=True),
  50. [
  51. base_qs.filter(project_id=project)[: paginator.max_limit]
  52. for project in project_ids
  53. ],
  54. )
  55. # We do `select_related` here to make the unions less heavy. This way we only join these
  56. # table for the rows we actually want.
  57. queryset = Activity.objects.filter(id__in=union_qs[: paginator.max_limit]).select_related(
  58. "project", "group", "user"
  59. )
  60. return self.paginate(
  61. request=request,
  62. queryset=queryset,
  63. paginator_cls=DateTimePaginator,
  64. order_by="-datetime",
  65. on_results=lambda x: serialize(
  66. x,
  67. request.user,
  68. OrganizationActivitySerializer(
  69. environment_func=self._get_environment_func(request, organization.id)
  70. ),
  71. ),
  72. )