Browse Source

feat(opsgenie): implement metric alerts (#53800)

Add metric alert functionality to the Opsgenie integration.

Fixes ER-1712
Michelle Fu 1 year ago
parent
commit
2f5ed8cfc1

+ 5 - 1
src/sentry/api/serializers/models/alert_rule_trigger_action.py

@@ -20,6 +20,8 @@ class AlertRuleTriggerActionSerializer(Serializer):
             return "Send a Microsoft Teams notification to " + action.target_display
         elif action.type == action.Type.SENTRY_APP.value:
             return "Send a notification via " + action.target_display
+        elif action.type == action.Type.OPSGENIE.value:
+            return "Send an Opsgenie notification to " + action.target_display
 
     def get_identifier_from_action(self, action):
         if action.type in [
@@ -27,7 +29,9 @@ class AlertRuleTriggerActionSerializer(Serializer):
             AlertRuleTriggerAction.Type.SENTRY_APP.value,
         ]:
             return int(action.target_identifier)
-
+        if action.type == AlertRuleTriggerAction.Type.OPSGENIE.value:
+            # return team ID: opsgenie team IDs are strings
+            return action.target_identifier
         # if an input_channel_id is provided, we flip these to display properly
         return (
             action.target_display if action.target_display is not None else action.target_identifier

+ 1 - 0
src/sentry/api/serializers/rest_framework/notification_action.py

@@ -23,6 +23,7 @@ INTEGRATION_SERVICES = {
     ActionService.PAGERDUTY.value,
     ActionService.SLACK.value,
     ActionService.MSTEAMS.value,
+    ActionService.OPSGENIE.value,
 }
 
 

+ 13 - 0
src/sentry/incidents/action_handlers.py

@@ -170,6 +170,19 @@ class PagerDutyActionHandler(DefaultActionHandler):
         send_incident_alert_notification(self.action, self.incident, metric_value, new_status)
 
 
+@AlertRuleTriggerAction.register_type(
+    "opsgenie",
+    AlertRuleTriggerAction.Type.OPSGENIE,
+    [AlertRuleTriggerAction.TargetType.SPECIFIC],
+    integration_provider="opsgenie",
+)
+class OpsgenieActionHandler(DefaultActionHandler):
+    def send_alert(self, metric_value: int | float, new_status: IncidentStatus):
+        from sentry.integrations.opsgenie.utils import send_incident_alert_notification
+
+        send_incident_alert_notification(self.action, self.incident, metric_value, new_status)
+
+
 @AlertRuleTriggerAction.register_type(
     "sentry_app",
     AlertRuleTriggerAction.Type.SENTRY_APP,

+ 11 - 3
src/sentry/incidents/endpoints/organization_alert_rule_available_action_index.py

@@ -9,7 +9,11 @@ from sentry.api.base import region_silo_endpoint
 from sentry.api.exceptions import ResourceDoesNotExist
 from sentry.constants import SentryAppStatus
 from sentry.incidents.endpoints.bases import OrganizationEndpoint
-from sentry.incidents.logic import get_available_action_integrations_for_org, get_pagerduty_services
+from sentry.incidents.logic import (
+    get_available_action_integrations_for_org,
+    get_opsgenie_teams,
+    get_pagerduty_services,
+)
 from sentry.incidents.models import AlertRuleTriggerAction
 from sentry.incidents.serializers import ACTION_TARGET_TYPE_TO_STRING
 from sentry.models import SentryAppInstallation
@@ -23,11 +27,10 @@ def build_action_response(
 
     :param registered_type: One of the registered AlertRuleTriggerAction types.
     :param integration: Optional. The Integration if this action uses a one.
-    :param organization: Optional. If this is a PagerDuty action, we need the organization to look up services.
+    :param organization: Optional. If this is a PagerDuty/Opsgenie action, we need the organization to look up services/teams.
     :param sentry_app: Optional. The SentryApp if this action uses a one.
     :return: The available action object.
     """
-
     action_response = {
         "type": registered_type.slug,
         "allowedTargetTypes": [
@@ -45,6 +48,11 @@ def build_action_response(
                 {"value": id, "label": service_name}
                 for id, service_name in get_pagerduty_services(organization.id, integration.id)
             ]
+        elif registered_type.type == AlertRuleTriggerAction.Type.OPSGENIE:
+            action_response["options"] = [
+                {"value": id, "label": team}
+                for id, team in get_opsgenie_teams(organization.id, integration.id)
+            ]
 
     elif sentry_app_installation:
         action_response["sentryAppName"] = sentry_app_installation.sentry_app.name

+ 37 - 0
src/sentry/incidents/logic.py

@@ -41,6 +41,7 @@ from sentry.search.events.builder import QueryBuilder
 from sentry.search.events.fields import resolve_field
 from sentry.services.hybrid_cloud.app import RpcSentryAppInstallation, app_service
 from sentry.services.hybrid_cloud.integration import RpcIntegration, integration_service
+from sentry.services.hybrid_cloud.integration.model import RpcOrganizationIntegration
 from sentry.shared_integrations.exceptions import DuplicateDisplayNameError
 from sentry.snuba.dataset import Dataset
 from sentry.snuba.entity_subscription import (
@@ -1244,6 +1245,10 @@ def get_target_identifier_display_for_integration(type, target_value, *args, **k
         target_identifier, target_value = get_alert_rule_trigger_action_pagerduty_service(
             target_value, *args, **kwargs
         )
+    elif type == AlertRuleTriggerAction.Type.OPSGENIE.value:
+        target_identifier, target_value = get_alert_rule_trigger_action_opsgenie_team(
+            target_value, *args, **kwargs
+        )
     else:
         raise Exception("Not implemented")
 
@@ -1327,6 +1332,25 @@ def get_alert_rule_trigger_action_pagerduty_service(
     return service["id"], service["service_name"]
 
 
+def get_alert_rule_trigger_action_opsgenie_team(
+    target_value: Optional[str],
+    organization: RpcOrganizationIntegration,
+    integration_id: int,
+    use_async_lookup=False,
+    input_channel_id=None,
+    integrations=None,
+):
+    from sentry.integrations.opsgenie.utils import get_team
+
+    oi = integration_service.get_organization_integration(
+        integration_id=integration_id, organization_id=organization.id
+    )
+    team = get_team(target_value, oi)
+    if not team:
+        raise InvalidTriggerActionError("No Opsgenie team found.")
+    return team["id"], team["team"]
+
+
 def get_alert_rule_trigger_action_sentry_app(organization, sentry_app_id, installations):
     from sentry.services.hybrid_cloud.app import app_service
 
@@ -1380,6 +1404,19 @@ def get_pagerduty_services(organization_id, integration_id) -> List[Tuple[int, s
     return [(s["id"], s["service_name"]) for s in services]
 
 
+def get_opsgenie_teams(organization_id, integration_id) -> list[Tuple[str, str]]:
+    org_int = integration_service.get_organization_integration(
+        organization_id=organization_id, integration_id=integration_id
+    )
+    if org_int is None:
+        return []
+    teams = []
+    team_table = org_int.config.get("team_table")
+    if team_table:
+        teams = [(team["id"], team["team"]) for team in team_table]
+    return teams
+
+
 # TODO: This is temporarily needed to support back and forth translations for snuba / frontend.
 # Uses a function from discover to break the aggregate down into parts, and then compare the "field"
 # to a list of accepted fields, or a list of fields we need to translate.

+ 3 - 1
src/sentry/incidents/models.py

@@ -543,7 +543,9 @@ class AlertRuleTriggerAction(AbstractNotificationAction):
 
     _type_registrations = {}
 
-    INTEGRATION_TYPES = frozenset((Type.PAGERDUTY.value, Type.SLACK.value, Type.MSTEAMS.value))
+    INTEGRATION_TYPES = frozenset(
+        (Type.PAGERDUTY.value, Type.SLACK.value, Type.MSTEAMS.value, Type.OPSGENIE.value)
+    )
 
     # ActionService items which are not supported for AlertRuleTriggerActions
     EXEMPT_SERVICES = frozenset((Type.SENTRY_NOTIFICATION.value,))

+ 1 - 1
src/sentry/integrations/metric_alerts.py

@@ -76,7 +76,7 @@ def get_incident_status_text(alert_rule: AlertRule, metric_value: str) -> str:
     return text
 
 
-def incident_attachment_info(incident, new_status: IncidentStatus, metric_value=None):
+def incident_attachment_info(incident: Incident, new_status: IncidentStatus, metric_value=None):
     alert_rule = incident.alert_rule
 
     status = INCIDENT_STATUS[new_status]

+ 14 - 4
src/sentry/integrations/opsgenie/client.py

@@ -92,7 +92,17 @@ class OpsgenieClient(IntegrationProxyClient):
             event = data
             payload = self._get_issue_alert_payload(data, rules, event, group)
         else:
-            # this is for metric alerts, which will be in the next PR
-            pass
-        headers = {"Authorization": "GenieKey " + self.integration_key}
-        return self.post("/alerts", data=payload, headers=headers)
+            # if we're acknowledging the alert—meaning that the Sentry alert was resolved
+            if data.get("identifier"):
+                alias = data["identifier"]
+                resp = self.post(
+                    f"/alerts/{alias}/acknowledge",
+                    data={},
+                    params={"identifierType": "alias"},
+                    headers=headers,
+                )
+                return resp
+            # this is a metric alert
+            payload = data
+        resp = self.post("/alerts", data=payload, headers=headers)
+        return resp

+ 74 - 1
src/sentry/integrations/opsgenie/utils.py

@@ -1,13 +1,86 @@
+import logging
 from typing import Optional
 
+from sentry.constants import ObjectStatus
+from sentry.incidents.models import AlertRuleTriggerAction, Incident, IncidentStatus
+from sentry.integrations.metric_alerts import incident_attachment_info
+from sentry.services.hybrid_cloud.integration import integration_service
 from sentry.services.hybrid_cloud.integration.model import RpcOrganizationIntegration
+from sentry.shared_integrations.exceptions import ApiError
+
+logger = logging.getLogger("sentry.integrations.opsgenie")
+from .client import OpsgenieClient
+
+
+def build_incident_attachment(incident: Incident, new_status: IncidentStatus, metric_value=None):
+    data = incident_attachment_info(incident, new_status, metric_value)
+    alert_key = f"incident_{incident.organization_id}_{incident.identifier}"
+    if new_status == IncidentStatus.CLOSED:
+        payload = {"identifier": alert_key}
+    priority = "P1"
+    if new_status == IncidentStatus.WARNING:
+        priority = "P2"
+    payload = {
+        "message": incident.alert_rule.name,
+        "alias": alert_key,
+        "description": data["text"],
+        "source": "Sentry",
+        "priority": priority,
+        "details": {
+            "URL": data["title_link"],  # type: ignore
+        },
+    }
+    return payload
 
 
 def get_team(team_id: Optional[str], org_integration: Optional[RpcOrganizationIntegration]):
     if not org_integration:
         return None
-    teams = org_integration.config["team_table"]
+    teams = org_integration.config.get("team_table")
+    if not teams:
+        return None
     for team in teams:
         if team["id"] == team_id:
             return team
     return None
+
+
+def send_incident_alert_notification(
+    action: AlertRuleTriggerAction,
+    incident: Incident,
+    metric_value: int,
+    new_status: IncidentStatus,
+) -> None:
+    integration, org_integration = integration_service.get_organization_context(
+        organization_id=incident.organization_id, integration_id=action.integration_id
+    )
+    if org_integration is None or integration is None or integration.status != ObjectStatus.ACTIVE:
+        logger.info("Opsgenie integration removed, but the rule is still active.")
+        return
+
+    team = get_team(org_integration=org_integration, team_id=action.target_identifier)
+    if not team:
+        # team removed, but the rule is still active
+        logger.info("Opsgenie team removed, but the rule is still active.")
+        return
+
+    integration_key = team["integration_key"]
+    client = OpsgenieClient(
+        integration=integration,
+        integration_key=integration_key,
+        org_integration_id=incident.organization_id,
+    )
+    attachment = build_incident_attachment(incident, new_status, metric_value)
+    try:
+        client.send_notification(attachment)
+    except ApiError as e:
+        logger.info(
+            "rule.fail.opsgenie_notification",
+            extra={
+                "error": str(e),
+                "team_name": team["team"],
+                "team_id": team["id"],
+                "integration_id": action.integration_id,
+            },
+        )
+        raise e

+ 3 - 0
src/sentry/models/notificationaction.py

@@ -49,6 +49,7 @@ class ActionService(FlexibleIntEnum):
     MSTEAMS = 3
     SENTRY_APP = 4
     SENTRY_NOTIFICATION = 5  # Use personal notification platform (src/sentry/notifications)
+    OPSGENIE = 6
 
     @classmethod
     def as_choices(cls) -> tuple[tuple[int, str], ...]:
@@ -56,6 +57,7 @@ class ActionService(FlexibleIntEnum):
         assert ExternalProviders.PAGERDUTY.name is not None
         assert ExternalProviders.SLACK.name is not None
         assert ExternalProviders.MSTEAMS.name is not None
+        assert ExternalProviders.OPSGENIE.name is not None
         return (
             (cls.EMAIL.value, ExternalProviders.EMAIL.name),
             (cls.PAGERDUTY.value, ExternalProviders.PAGERDUTY.name),
@@ -63,6 +65,7 @@ class ActionService(FlexibleIntEnum):
             (cls.MSTEAMS.value, ExternalProviders.MSTEAMS.name),
             (cls.SENTRY_APP.value, "sentry_app"),
             (cls.SENTRY_NOTIFICATION.value, "sentry_notification"),
+            (cls.OPSGENIE.value, ExternalProviders.OPSGENIE.name),
         )
 
 

Some files were not shown because too many files changed in this diff