Browse Source

feat(codeowners): Use relay's Rust regex engine to find path matches (#43556)

NisanthanNanthakumar 2 years ago
parent
commit
fd643cba9f

+ 1 - 1
requirements-base.txt

@@ -55,7 +55,7 @@ rfc3339-validator>=0.1.2
 rfc3986-validator>=0.1.1
 # [end] jsonschema format validators
 sentry-arroyo>=2.5.0
-sentry-relay>=0.8.16
+sentry-relay>=0.8.17
 sentry-sdk>=1.14.0
 snuba-sdk>=1.0.3
 simplejson>=3.17.6

+ 1 - 1
requirements-dev-frozen.txt

@@ -155,7 +155,7 @@ rsa==4.8
 s3transfer==0.5.2
 selenium==4.3.0
 sentry-arroyo==2.5.0
-sentry-relay==0.8.16
+sentry-relay==0.8.17
 sentry-sdk==1.14.0
 simplejson==3.17.6
 six==1.16.0

+ 1 - 1
requirements-frozen.txt

@@ -108,7 +108,7 @@ rsa==4.8
 s3transfer==0.5.2
 selenium==4.3.0
 sentry-arroyo==2.5.0
-sentry-relay==0.8.16
+sentry-relay==0.8.17
 sentry-sdk==1.14.0
 simplejson==3.17.6
 six==1.16.0

+ 1 - 0
src/sentry/analytics/events/__init__.py

@@ -29,6 +29,7 @@ from .issue_assigned import *  # noqa: F401,F403
 from .issue_deleted import *  # noqa: F401,F403
 from .issue_ignored import *  # noqa: F401,F403
 from .issue_mark_reviewed import *  # noqa: F401,F403
+from .issue_owners_time_durations import *  # noqa: F401,F403
 from .issue_resolved import *  # noqa: F401,F403
 from .issue_tracker_used import *  # noqa: F401,F403
 from .issue_unignored import *  # noqa: F401,F403

+ 16 - 0
src/sentry/analytics/events/issue_owners_time_durations.py

@@ -0,0 +1,16 @@
+from sentry import analytics
+
+
+class IssueOwnersTimeDurationsEvent(analytics.Event):
+    type = "issue_owners.time_durations"
+
+    attributes = (
+        analytics.Attribute("group_id"),
+        analytics.Attribute("event_id"),
+        analytics.Attribute("project_id"),
+        analytics.Attribute("baseline_duration", type=int),
+        analytics.Attribute("experiment_duration", type=int),
+    )
+
+
+analytics.register(IssueOwnersTimeDurationsEvent)

+ 2 - 0
src/sentry/conf/server.py

@@ -1120,6 +1120,8 @@ SENTRY_FEATURES = {
     "organizations:enterprise-perf": False,
     # Enable the API to importing CODEOWNERS for a project
     "organizations:integrations-codeowners": False,
+    # Enable fast CODEOWNERS path matching
+    "organizations:scaleable_codeowners_search": False,
     # Enable inviting members to organizations.
     "organizations:invite-members": True,
     # Enable rate limits for inviting members.

+ 1 - 0
src/sentry/features/__init__.py

@@ -148,6 +148,7 @@ default_manager.add("organizations:reprocessing-v2", OrganizationFeature)
 default_manager.add("organizations:required-email-verification", OrganizationFeature, True)
 default_manager.add("organizations:rule-page", OrganizationFeature)
 default_manager.add("organizations:sandbox-kill-switch", OrganizationFeature, True)
+default_manager.add("organizations:scaleable_codeowners_search", OrganizationFeature)
 default_manager.add("organizations:scim-orgmember-roles", OrganizationFeature, True)
 default_manager.add("organizations:scim-team-roles", OrganizationFeature, True)
 default_manager.add("organizations:sentry-functions", OrganizationFeature, False)

+ 23 - 7
src/sentry/models/projectownership.py

@@ -15,12 +15,23 @@ from sentry.utils import metrics
 from sentry.utils.cache import cache
 
 if TYPE_CHECKING:
-    from sentry.models import Team
+    from sentry.models import ProjectCodeOwners, Team
     from sentry.services.hybrid_cloud.user import APIUser
 
 READ_CACHE_DURATION = 3600
 
 
+def get_duration(func):
+    def wrapper(*args, **kwargs):
+        start_time = timezone.now()
+        result = func(*args, **kwargs)
+        end_time = timezone.now()
+        duration = end_time - start_time
+        return result, duration.total_seconds()
+
+    return wrapper
+
+
 @region_silo_only_model
 class ProjectOwnership(Model):
     __include_in_export__ = True
@@ -117,7 +128,7 @@ class ProjectOwnership(Model):
         codeowners = ProjectCodeOwners.get_codeowners_cached(project_id)
         ownership.schema = cls.get_combined_schema(ownership, codeowners)
 
-        rules = cls._matching_ownership_rules(ownership, project_id, data)
+        rules = cls._matching_ownership_rules(ownership, data)
 
         if not rules:
             project = Project.objects.get(id=project_id)
@@ -163,8 +174,9 @@ class ProjectOwnership(Model):
         return result
 
     @classmethod
+    @get_duration
     def get_issue_owners(
-        cls, project_id, data, limit=2
+        cls, project_id, data, limit=2, experiment=False
     ) -> Sequence[
         Tuple[
             "Rule",
@@ -190,9 +202,9 @@ class ProjectOwnership(Model):
             if not ownership:
                 ownership = cls(project_id=project_id)
 
-            ownership_rules = cls._matching_ownership_rules(ownership, project_id, data)
+            ownership_rules = cls._matching_ownership_rules(ownership, data, experiment)
             codeowners_rules = (
-                cls._matching_ownership_rules(codeowners, project_id, data) if codeowners else []
+                cls._matching_ownership_rules(codeowners, data, experiment) if codeowners else []
             )
 
             if not (codeowners_rules or ownership_rules):
@@ -293,12 +305,16 @@ class ProjectOwnership(Model):
 
     @classmethod
     def _matching_ownership_rules(
-        cls, ownership: "ProjectOwnership", project_id: int, data: Mapping[str, Any]
+        cls,
+        ownership: Union["ProjectOwnership", "ProjectCodeOwners"],
+        data: Mapping[str, Any],
+        experiment: bool = False,
     ) -> Sequence["Rule"]:
         rules = []
+
         if ownership.schema is not None:
             for rule in load_schema(ownership.schema):
-                if rule.test(data):
+                if rule.test(data, experiment):
                     rules.append(rule)
 
         return rules

+ 11 - 11
src/sentry/ownership/grammar.py

@@ -14,6 +14,7 @@ from rest_framework.serializers import ValidationError
 
 from sentry.eventstore.models import EventSubjectTemplateData
 from sentry.models import ActorTuple, RepositoryProjectPathConfig
+from sentry.utils.codeowners import codeowners_match
 from sentry.utils.event_frames import find_stack_frames, get_sdk_name, munged_filename_and_frames
 from sentry.utils.glob import glob_match
 from sentry.utils.safe import PathSearchable, get_path
@@ -85,8 +86,8 @@ class Rule(namedtuple("Rule", "matcher owners")):
     def load(cls, data: Mapping[str, Any]) -> Rule:
         return cls(Matcher.load(data["matcher"]), [Owner.load(o) for o in data["owners"]])
 
-    def test(self, data: Mapping[str, Any]) -> Union[bool, Any]:
-        return self.matcher.test(data)
+    def test(self, data: Mapping[str, Any], experiment: bool = False) -> Union[bool, Any]:
+        return self.matcher.test(data, experiment)
 
 
 class Matcher(namedtuple("Matcher", "type pattern")):
@@ -128,10 +129,7 @@ class Matcher(namedtuple("Matcher", "type pattern")):
 
         return frames, keys
 
-    def test(
-        self,
-        data: PathSearchable,
-    ) -> bool:
+    def test(self, data: PathSearchable, experiment: bool = False) -> bool:
         if self.type == URL:
             return self.test_url(data)
         elif self.type == PATH:
@@ -141,17 +139,19 @@ class Matcher(namedtuple("Matcher", "type pattern")):
         elif self.type.startswith("tags."):
             return self.test_tag(data)
         elif self.type == CODEOWNERS:
+            new_func = lambda val, pattern: bool(codeowners_match(val, pattern))
+            baseline_func = (
+                lambda val, pattern: bool(_path_to_regex(pattern).search(val))
+                if val is not None
+                else False
+            )
             return self.test_frames(
                 *self.munge_if_needed(data),
                 # Codeowners has a slightly different syntax compared to issue owners
                 # As such we need to match it using gitignore logic.
                 # See syntax documentation here:
                 # https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-code-owners
-                match_frame_value_func=lambda val, pattern: bool(
-                    _path_to_regex(pattern).search(val)
-                )
-                if val is not None
-                else False,
+                match_frame_value_func=new_func if experiment else baseline_func,
             )
         return False
 

+ 28 - 3
src/sentry/tasks/post_process.py

@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import logging
+import random
 from datetime import timedelta
 from typing import TYPE_CHECKING, List, Mapping, Optional, Sequence, Tuple, TypedDict, Union
 
@@ -8,7 +9,7 @@ import sentry_sdk
 from django.conf import settings
 from django.utils import timezone
 
-from sentry import features
+from sentry import analytics, features
 from sentry.exceptions import PluginError
 from sentry.killswitches import killswitch_matches_context
 from sentry.signals import event_processed, issue_unignored, transaction_processed
@@ -167,10 +168,34 @@ def handle_owner_assignment(job):
                         # see ProjectOwnership.get_issue_owners
                         issue_owners = []
                     else:
-                        issue_owners = ProjectOwnership.get_issue_owners(
-                            group.project_id, event.data
+
+                        issue_owners, baseline_duration = ProjectOwnership.get_issue_owners(
+                            project.id, event.data
                         )
 
+                        should_sample = random.randint(1, 10) % 10 == 0
+                        if (
+                            features.has(
+                                "organizations:scaleable_codeowners_search",
+                                project.organization,
+                                actor=None,
+                            )
+                            and should_sample
+                        ):
+
+                            _, experiment_duration = ProjectOwnership.get_issue_owners(
+                                project.id, event.data, experiment=True
+                            )
+
+                            analytics.record(
+                                "issue_owners.time_durations",
+                                group_id=group.id,
+                                project_id=project.id,
+                                event_id=event.event_id,
+                                baseline_duration=baseline_duration,
+                                experiment_duration=experiment_duration,
+                            )
+
                 with sentry_sdk.start_span(
                     op="post_process.handle_owner_assignment.handle_group_owners"
                 ):

Some files were not shown because too many files changed in this diff