Browse Source

feat(github-growth): link all repos task (#52595)

Cathy Teng 1 year ago
parent
commit
a2f6c9d87a

+ 3 - 0
src/sentry/integrations/base.py

@@ -414,6 +414,9 @@ class IntegrationInstallation:
             self.logger.exception(str(exc))
             raise IntegrationError(self.message_from_error(exc)).with_traceback(sys.exc_info()[2])
 
+    def is_rate_limited_error(self, exc: Exception) -> bool:
+        raise NotImplementedError
+
     @property
     def metadata(self) -> IntegrationMetadata:
         return self.model.metadata

+ 18 - 0
src/sentry/integrations/github/integration.py

@@ -30,6 +30,9 @@ from sentry.services.hybrid_cloud.repository import RpcRepository, repository_se
 from sentry.shared_integrations.constants import ERR_INTERNAL, ERR_UNAUTHORIZED
 from sentry.shared_integrations.exceptions import ApiError, IntegrationError
 from sentry.tasks.integrations import migrate_repo
+from sentry.tasks.integrations.github.pr_comment import RATE_LIMITED_MESSAGE
+from sentry.tasks.integrations.link_all_repos import link_all_repos
+from sentry.utils import metrics
 from sentry.web.helpers import render_to_response
 
 from .client import GitHubAppsClient, GitHubClientMixin
@@ -112,6 +115,13 @@ class GitHubIntegration(IntegrationInstallation, GitHubIssueBasic, RepositoryMix
             raise IntegrationError("Organization Integration does not exist")
         return GitHubAppsClient(integration=self.model, org_integration_id=self.org_integration.id)
 
+    def is_rate_limited_error(self, exc: Exception) -> bool:
+        if exc.json and RATE_LIMITED_MESSAGE in exc.json.get("message", ""):
+            metrics.incr("github.link_all_repos.rate_limited_error")
+            return True
+
+        return False
+
     def get_trees_for_org(self, cache_seconds: int = 3600 * 24) -> Dict[str, RepoTree]:
         trees: Dict[str, RepoTree] = {}
         domain_name = self.model.metadata["domain_name"]
@@ -305,6 +315,14 @@ class GitHubIntegrationProvider(IntegrationProvider):
                 }
             )
 
+        link_all_repos.apply_async(
+            kwargs={
+                "integration_key": self.key,
+                "integration_id": integration.id,
+                "organization_id": organization.id,
+            }
+        )
+
     def get_pipeline_views(self) -> Sequence[PipelineView]:
         return [GitHubInstallationRedirect()]
 

+ 13 - 0
src/sentry/plugins/providers/integration_repository.py

@@ -25,6 +25,19 @@ class RepoExistsError(APIException):
     detail = {"errors": {"__all__": "A repository with that name already exists"}}
 
 
+def get_integration_repository_provider(integration):
+    from sentry.plugins.base import bindings  # circular import
+
+    binding_key = "integration-repository.provider"
+    provider_key = (
+        integration.provider
+        if integration.provider.startswith("integrations:")
+        else "integrations:" + integration.provider
+    )
+    provider_cls = bindings.get(binding_key).get(provider_key)
+    return provider_cls(id=provider_key)
+
+
 class IntegrationRepositoryProvider:
     """
     Repository Provider for Integrations in the Sentry Repository.

+ 1 - 0
src/sentry/tasks/integrations/__init__.py

@@ -37,6 +37,7 @@ settings.CELERY_IMPORTS += (
     "sentry.tasks.integrations.create_comment",
     "sentry.tasks.integrations.github.pr_comment",
     "sentry.tasks.integrations.kick_off_status_syncs",
+    "sentry.tasks.integrations.link_all_repos",
     "sentry.tasks.integrations.migrate_issues",
     "sentry.tasks.integrations.migrate_repo",
     "sentry.tasks.integrations.sync_assignee_outbound_impl",

+ 78 - 0
src/sentry/tasks/integrations/link_all_repos.py

@@ -0,0 +1,78 @@
+import logging
+
+import sentry_sdk
+
+from sentry.models.organization import Organization
+from sentry.plugins.providers.integration_repository import get_integration_repository_provider
+from sentry.services.hybrid_cloud.integration import integration_service
+from sentry.shared_integrations.exceptions.base import ApiError
+from sentry.tasks.base import instrumented_task
+from sentry.utils import metrics
+
+logger = logging.getLogger(__name__)
+
+
+def get_repo_config(repo, integration_id):
+    return {
+        "external_id": repo["id"],
+        "integration_id": integration_id,
+        "identifier": repo["full_name"],
+    }
+
+
+@instrumented_task(name="sentry.integrations.github.link_all_repos", queue="integrations")
+def link_all_repos(
+    integration_key: str,
+    integration_id: int,
+    organization_id: int,
+):
+    integration = integration_service.get_integration(integration_id=integration_id)
+    if not integration:
+        logger.error(
+            f"{integration_key}.link_all_repos.integration_missing",
+            extra={"organization_id": organization_id},
+        )
+        metrics.incr("github.link_all_repos.error", tags={"type": "missing_integration"})
+        return
+
+    try:
+        organization = Organization.objects.get(id=organization_id)
+    except Organization.DoesNotExist:
+        logger.error(
+            f"{integration_key}.link_all_repos.organization_missing",
+            extra={"organization_id": organization_id},
+        )
+        metrics.incr(
+            f"{integration_key}.link_all_repos.error",
+            tags={"type": "missing_organization"},
+        )
+        return
+
+    installation = integration_service.get_installation(
+        integration=integration, organization_id=organization_id
+    )
+
+    client = installation.get_client()
+
+    try:
+        repositories = client.get_repositories(fetch_max_pages=True)
+    except ApiError as e:
+        if installation.is_rate_limited_error(e):
+            return
+
+        metrics.incr(f"{integration_key}.link_all_repos.api_error")
+        raise e
+
+    integration_repo_provider = get_integration_repository_provider(integration)
+
+    for repo in repositories:
+        try:
+            config = get_repo_config(repo, integration_id)
+            integration_repo_provider.create_repository(
+                repo_config=config, organization=organization
+            )
+        except KeyError:
+            continue
+        except Exception as e:
+            sentry_sdk.capture_exception(e)
+            continue

+ 2 - 3
tests/sentry/integrations/github/test_integration.py

@@ -251,10 +251,9 @@ class GitHubIntegrationTest(IntegrationTestCase):
         assert integration.external_id == self.installation_id
         assert integration.name == "Test Organization"
         assert integration.metadata == {
-            "access_token": None,
+            "access_token": self.access_token,
             # The metadata doesn't get saved with the timezone "Z" character
-            # for some reason, so just compare everything but that.
-            "expires_at": None,
+            "expires_at": self.expires_at[:-1],
             "icon": "http://example.com/avatar.png",
             "domain_name": "github.com/Test-Organization",
             "account_type": "Organization",

+ 200 - 0
tests/sentry/tasks/integrations/test_link_all_repos.py

@@ -0,0 +1,200 @@
+from datetime import timedelta
+from unittest.mock import patch
+
+import pytest
+import responses
+from django.db import IntegrityError
+from django.utils import timezone
+
+from sentry.integrations.github.integration import GitHubIntegrationProvider
+from sentry.models.repository import Repository
+from sentry.shared_integrations.exceptions.base import ApiError
+from sentry.snuba.sessions_v2 import isoformat_z
+from sentry.tasks.integrations.link_all_repos import link_all_repos
+from sentry.testutils.cases import IntegrationTestCase
+from sentry.testutils.silo import region_silo_test
+
+
+@region_silo_test(stable=True)
+@patch("sentry.integrations.github.client.get_jwt", return_value=b"jwt_token_1")
+class LinkAllReposTestCase(IntegrationTestCase):
+    provider = GitHubIntegrationProvider
+    base_url = "https://api.github.com"
+    key = "github"
+
+    def setUp(self):
+        super().setUp()
+        self.installation_id = "github:1"
+        self.user_id = "user_1"
+        self.app_id = "app_1"
+        self.access_token = "xxxxx-xxxxxxxxx-xxxxxxxxxx-xxxxxxxxxxxx"
+        self.expires_at = isoformat_z(timezone.now() + timedelta(days=365))
+
+    def _add_responses(self):
+        responses.add(
+            responses.POST,
+            self.base_url + f"/app/installations/{self.installation_id}/access_tokens",
+            json={"token": self.access_token, "expires_at": self.expires_at},
+        )
+        responses.add(
+            responses.GET,
+            self.base_url + "/installation/repositories?per_page=100",
+            status=200,
+            json={
+                "total_count": 2,
+                "repositories": [
+                    {
+                        "id": 1,
+                        "full_name": "getsentry/sentry",
+                    },
+                    {
+                        "id": 2,
+                        "full_name": "getsentry/snuba",
+                    },
+                ],
+            },
+        )
+
+    @responses.activate
+    def test_link_all_repos(self, _):
+        self._add_responses()
+
+        link_all_repos(
+            integration_key=self.key,
+            integration_id=self.integration.id,
+            organization_id=self.organization.id,
+        )
+
+        repos = Repository.objects.all()
+        assert len(repos) == 2
+
+        for repo in repos:
+            assert repo.organization_id == self.organization.id
+            assert repo.provider == "integrations:github"
+
+        assert repos[0].name == "getsentry/sentry"
+        assert repos[1].name == "getsentry/snuba"
+
+    @responses.activate
+    def test_link_all_repos_api_response_keyerror(self, _):
+        responses.add(
+            responses.POST,
+            self.base_url + f"/app/installations/{self.installation_id}/access_tokens",
+            json={"token": self.access_token, "expires_at": self.expires_at},
+        )
+        responses.add(
+            responses.GET,
+            self.base_url + "/installation/repositories?per_page=100",
+            status=200,
+            json={
+                "total_count": 2,
+                "repositories": [
+                    {
+                        "full_name": "getsentry/sentry",
+                    },
+                    {
+                        "id": 2,
+                        "full_name": "getsentry/snuba",
+                    },
+                ],
+            },
+        )
+
+        link_all_repos(
+            integration_key=self.key,
+            integration_id=self.integration.id,
+            organization_id=self.organization.id,
+        )
+
+        repos = Repository.objects.all()
+        assert len(repos) == 1
+
+        assert repos[0].organization_id == self.organization.id
+        assert repos[0].provider == "integrations:github"
+
+        assert repos[0].name == "getsentry/snuba"
+
+    @patch("sentry.tasks.integrations.link_all_repos.metrics")
+    def test_link_all_repos_missing_integration(self, mock_metrics, _):
+        link_all_repos(
+            integration_key=self.key,
+            integration_id=0,
+            organization_id=self.organization.id,
+        )
+        mock_metrics.incr.assert_called_with(
+            "github.link_all_repos.error", tags={"type": "missing_integration"}
+        )
+
+    @patch("sentry.tasks.integrations.link_all_repos.metrics")
+    def test_link_all_repos_missing_organization(self, mock_metrics, _):
+        link_all_repos(
+            integration_key=self.key,
+            integration_id=self.integration.id,
+            organization_id=0,
+        )
+        mock_metrics.incr.assert_called_with(
+            "github.link_all_repos.error", tags={"type": "missing_organization"}
+        )
+
+    @patch("sentry.tasks.integrations.link_all_repos.metrics")
+    @responses.activate
+    def test_link_all_repos_api_error(self, mock_metrics, _):
+        responses.add(
+            responses.POST,
+            self.base_url + f"/app/installations/{self.installation_id}/access_tokens",
+            json={"token": self.access_token, "expires_at": self.expires_at},
+        )
+        responses.add(
+            responses.GET,
+            self.base_url + "/installation/repositories?per_page=100",
+            status=400,
+        )
+
+        with pytest.raises(ApiError):
+            link_all_repos(
+                integration_key=self.key,
+                integration_id=self.integration.id,
+                organization_id=self.organization.id,
+            )
+            mock_metrics.incr.assert_called_with("github.link_all_repos.api_error")
+
+    @patch("sentry.integrations.github.integration.metrics")
+    @responses.activate
+    def test_link_all_repos_api_error_rate_limited(self, mock_metrics, _):
+        responses.add(
+            responses.POST,
+            self.base_url + f"/app/installations/{self.installation_id}/access_tokens",
+            json={"token": self.access_token, "expires_at": self.expires_at},
+        )
+        responses.add(
+            responses.GET,
+            self.base_url + "/installation/repositories?per_page=100",
+            status=400,
+            json={
+                "message": "API rate limit exceeded",
+                "documentation_url": "https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting",
+            },
+        )
+
+        link_all_repos(
+            integration_key=self.key,
+            integration_id=self.integration.id,
+            organization_id=self.organization.id,
+        )
+        mock_metrics.incr.assert_called_with("github.link_all_repos.rate_limited_error")
+
+    @patch("sentry_sdk.capture_exception")
+    @patch("sentry.models.Repository.objects.create")
+    @responses.activate
+    def test_link_all_repos_repo_creation_error(self, mock_repo, mock_capture_exception, _):
+        mock_repo.side_effect = IntegrityError
+
+        self._add_responses()
+
+        link_all_repos(
+            integration_key=self.key,
+            integration_id=self.integration.id,
+            organization_id=self.organization.id,
+        )
+
+        assert mock_capture_exception.called