Просмотр исходного кода

ref(code-mappings): Unify code-mapping logic (#67665)

The goal of this PR is to unify all logic to derive the stack trace root
and source code root, since this logic has been materially different
across automatic and manual code-mapping derivations.

This PR makes `find_roots` the single source of truth for deriving these
roots across the manual and automatic derivations. As an added benefit,
`find_roots` is more refactorable and resilient to future platforms that
made be added, and it reduces the footprint and complexity of
code-mappings in general.
Michael Sun 11 месяцев назад
Родитель
Сommit
9993f76105

+ 13 - 52
src/sentry/integrations/utils/code_mapping.py

@@ -138,6 +138,7 @@ def convert_stacktrace_frame_path_to_source_path(
 # XXX: Look at sentry.interfaces.stacktrace and maybe use that
 class FrameFilename:
     def __init__(self, frame_file_path: str) -> None:
+        self.raw_path = frame_file_path
         if frame_file_path[0] == "/":
             frame_file_path = frame_file_path.replace("/", "", 1)
 
@@ -187,9 +188,9 @@ class FrameFilename:
         # - app:///../some/path/foo.js
 
         start_at_index = get_straight_path_prefix_end_index(frame_file_path)
-        backslash_index = frame_file_path.find("/", start_at_index)
+        slash_index = frame_file_path.find("/", start_at_index)
         dir_path, self.file_name = frame_file_path.rsplit("/", 1)  # foo.tsx (both)
-        self.root = frame_file_path[0:backslash_index]  # some or .some
+        self.root = frame_file_path[0:slash_index]  # some or .some
         self.dir_path = dir_path.replace(self.root, "")  # some/path/ (both)
         self.file_and_dir_path = remove_straight_path_prefix(
             frame_file_path
@@ -299,36 +300,18 @@ class CodeMappingTreesHelper:
             ]
 
             for file in matches:
+                stacktrace_root, source_path = find_roots(frame_filename.raw_path, file)
                 file_matches.append(
                     {
                         "filename": file,
                         "repo_name": repo_tree.repo.name,
                         "repo_branch": repo_tree.repo.branch,
-                        "stacktrace_root": f"{frame_filename.root}/",
-                        "source_path": _get_code_mapping_source_path(file, frame_filename),
+                        "stacktrace_root": stacktrace_root,
+                        "source_path": source_path,
                     }
                 )
         return file_matches
 
-    def _normalized_stack_and_source_roots(
-        self, stacktrace_root: str, source_path: str
-    ) -> tuple[str, str]:
-        # We have a one to one code mapping (e.g. "app/" & "app/")
-        if source_path == stacktrace_root:
-            stacktrace_root = ""
-            source_path = ""
-        # stacktrace_root starts with a FILE_PATH_PREFIX_REGEX
-        elif (without := remove_straight_path_prefix(stacktrace_root)) != stacktrace_root:
-            start_index = get_straight_path_prefix_end_index(stacktrace_root)
-            starts_with = stacktrace_root[:start_index]
-            if source_path == without:
-                stacktrace_root = starts_with
-                source_path = ""
-            elif source_path.rfind(f"/{without}"):
-                stacktrace_root = starts_with
-                source_path = source_path.replace(f"/{without}", "/")
-        return (stacktrace_root, source_path)
-
     def _generate_code_mapping_from_tree(
         self,
         repo_tree: RepoTree,
@@ -342,12 +325,7 @@ class CodeMappingTreesHelper:
         if len(matched_files) != 1:
             return []
 
-        stacktrace_root = f"{frame_filename.root}/"
-        source_path = _get_code_mapping_source_path(matched_files[0], frame_filename)
-        if frame_filename.frame_type() != "packaged":
-            stacktrace_root, source_path = self._normalized_stack_and_source_roots(
-                stacktrace_root, source_path
-            )
+        stacktrace_root, source_path = find_roots(frame_filename.raw_path, matched_files[0])
         # It is too risky generating code mappings when there's more
         # than one file potentially matching
         return [
@@ -516,29 +494,6 @@ def get_sorted_code_mapping_configs(project: Project) -> list[RepositoryProjectP
     return sorted_configs
 
 
-def _get_code_mapping_source_path(src_file: str, frame_filename: FrameFilename) -> str:
-    """Generate the source code root for a code mapping. It always includes a last backslash"""
-    source_code_root = None
-    if frame_filename.frame_type() == "packaged":
-        if frame_filename.dir_path != "":
-            # src/sentry/identity/oauth2.py (sentry/identity/oauth2.py) -> src/sentry/
-            source_path = src_file.rsplit(frame_filename.dir_path)[0].rstrip("/")
-            source_code_root = f"{source_path}/"
-        elif frame_filename.root != "":
-            # src/sentry/wsgi.py (sentry/wsgi.py) -> src/sentry/
-            source_code_root = src_file.rsplit(frame_filename.file_name)[0]
-        else:
-            # ssl.py -> raise NotImplementedError
-            raise NotImplementedError("We do not support top level files.")
-    else:
-        # static/app/foo.tsx (./app/foo.tsx) -> static/app/
-        # static/app/foo.tsx (app/foo.tsx) -> static/app/
-        source_code_root = f"{src_file.replace(frame_filename.file_and_dir_path, remove_straight_path_prefix(frame_filename.root))}/"
-    if source_code_root:
-        assert source_code_root.endswith("/")
-    return source_code_root
-
-
 def find_roots(stack_path: str, source_path: str) -> tuple[str, str]:
     """
     Returns a tuple containing the stack_root, and the source_root.
@@ -554,6 +509,12 @@ def find_roots(stack_path: str, source_path: str) -> tuple[str, str]:
 
             if stack_root:  # append trailing slash
                 stack_root = f"{stack_root}{stack_path_delim}"
+            if source_root and source_root[-1] != SLASH:
+                source_root = f"{source_root}{SLASH}"
+            if stack_path.endswith(".py") and len(overlap_to_check) > 1:
+                next_dir = overlap_to_check[0]
+                stack_root = f"{stack_root}{next_dir}{stack_path_delim}"
+                source_root = f"{source_root}{next_dir}{SLASH}"
 
             return (stack_root, source_root)
 

+ 8 - 8
tests/sentry/api/endpoints/test_organization_derive_code_mappings.py

@@ -4,7 +4,6 @@ from django.db import router
 from django.urls import reverse
 from rest_framework import status
 
-from sentry.integrations.utils.code_mapping import FrameFilename, _get_code_mapping_source_path
 from sentry.models.integrations.integration import Integration
 from sentry.models.integrations.repository_project_path_config import RepositoryProjectPathConfig
 from sentry.models.repository import Repository
@@ -61,15 +60,14 @@ class OrganizationDeriveCodeMappingsTest(APITestCase):
     @patch("sentry.integrations.github.GitHubIntegration.get_trees_for_org")
     def test_get_start_with_backslash(self, mock_get_trees_for_org):
         file = "stack/root/file.py"
-        frame_info = FrameFilename(f"/{file}")
         config_data = {"stacktraceFilename": f"/{file}"}
         expected_matches = [
             {
                 "filename": file,
                 "repo_name": "getsentry/codemap",
                 "repo_branch": "master",
-                "stacktrace_root": f"{frame_info.root}",
-                "source_path": _get_code_mapping_source_path(file, frame_info),
+                "stacktrace_root": "",
+                "source_path": "",
             }
         ]
         with patch(
@@ -116,8 +114,9 @@ class OrganizationDeriveCodeMappingsTest(APITestCase):
             "projectId": self.project.id,
             "stacktraceFilename": "stack/root/file.py",
         }
-        with assume_test_silo_mode(SiloMode.CONTROL), unguarded_write(
-            using=router.db_for_write(Integration)
+        with (
+            assume_test_silo_mode(SiloMode.CONTROL),
+            unguarded_write(using=router.db_for_write(Integration)),
         ):
             Integration.objects.all().delete()
         response = self.client.get(self.url, data=config_data, format="json")
@@ -184,8 +183,9 @@ class OrganizationDeriveCodeMappingsTest(APITestCase):
             "defaultBranch": "master",
             "repoName": "name",
         }
-        with assume_test_silo_mode(SiloMode.CONTROL), unguarded_write(
-            using=router.db_for_write(Integration)
+        with (
+            assume_test_silo_mode(SiloMode.CONTROL),
+            unguarded_write(using=router.db_for_write(Integration)),
         ):
             Integration.objects.all().delete()
         response = self.client.post(self.url, data=config_data, format="json")

+ 8 - 8
tests/sentry/api/endpoints/test_project_repo_path_parsing.py

@@ -196,8 +196,8 @@ class ProjectStacktraceLinkGithubTest(BaseStacktraceLinkTest):
             "integrationId": self.integration.id,
             "repositoryId": self.repo.id,
             "provider": "github",
-            "stackRoot": "",
-            "sourceRoot": "src/",
+            "stackRoot": "sentry/",
+            "sourceRoot": "src/sentry/",
             "defaultBranch": "master",
         }
 
@@ -224,8 +224,8 @@ class ProjectStacktraceLinkGithubTest(BaseStacktraceLinkTest):
             "integrationId": self.integration.id,
             "repositoryId": self.repo.id,
             "provider": "github",
-            "stackRoot": "stuff/hey/here/",
-            "sourceRoot": "src/",
+            "stackRoot": "stuff/hey/here/sentry/",
+            "sourceRoot": "src/sentry/",
             "defaultBranch": "master",
         }
 
@@ -260,8 +260,8 @@ class ProjectStacktraceLinkGithubTest(BaseStacktraceLinkTest):
             "integrationId": self.integration.id,
             "repositoryId": self.repo.id,
             "provider": "github",
-            "stackRoot": "C:\\potatos\\and\\prs\\",
-            "sourceRoot": "src/",
+            "stackRoot": "C:\\potatos\\and\\prs\\sentry\\",
+            "sourceRoot": "src/sentry/",
             "defaultBranch": "main",
         }
 
@@ -298,8 +298,8 @@ class ProjectStacktraceLinkGitlabTest(BaseStacktraceLinkTest):
             "integrationId": self.integration.id,
             "repositoryId": self.repo.id,
             "provider": "gitlab",
-            "stackRoot": "",
-            "sourceRoot": "src/",
+            "stackRoot": "sentry/",
+            "sourceRoot": "src/sentry/",
             "defaultBranch": "master",
         }
 

+ 21 - 34
tests/sentry/integrations/utils/test_code_mapping.py

@@ -11,6 +11,7 @@ from sentry.integrations.utils.code_mapping import (
     UnsupportedFrameFilename,
     convert_stacktrace_frame_path_to_source_path,
     filter_source_code_files,
+    find_roots,
     get_extension,
     get_sorted_code_mapping_configs,
     should_include,
@@ -271,61 +272,47 @@ class TestDerivedCodeMappings(TestCase):
         ]
         assert matches == expected_matches
 
-    def test_normalized_stack_and_source_roots_starts_with_period_slash(self):
-        stacktrace_root, source_path = self.code_mapping_helper._normalized_stack_and_source_roots(
-            "./app/", "static/app/"
-        )
+    def test_find_roots_starts_with_period_slash(self):
+        stacktrace_root, source_path = find_roots("./app/", "static/app/")
         assert stacktrace_root == "./"
         assert source_path == "static/"
 
-    def test_normalized_stack_and_source_roots_starts_with_period_slash_no_containing_directory(
+    def test_find_roots_starts_with_period_slash_no_containing_directory(
         self,
     ):
-        stacktrace_root, source_path = self.code_mapping_helper._normalized_stack_and_source_roots(
-            "./app/", "app/"
-        )
+        stacktrace_root, source_path = find_roots("./app/", "app/")
         assert stacktrace_root == "./"
         assert source_path == ""
 
-    def test_normalized_stack_and_source_not_matching(self):
-        stacktrace_root, source_path = self.code_mapping_helper._normalized_stack_and_source_roots(
-            "sentry/", "src/sentry/"
-        )
-        assert stacktrace_root == "sentry/"
-        assert source_path == "src/sentry/"
+    def test_find_roots_not_matching(self):
+        # THIS CASE IS INCORRECT - needs to be fixed in the "packaged" refactor
+        # correct: stacktrace_root == "sentry/", source_path == "src/sentry/"
+        stacktrace_root, source_path = find_roots("sentry/", "src/sentry/")
+        assert stacktrace_root == ""
+        assert source_path == "src/"
 
-    def test_normalized_stack_and_source_roots_equal(self):
-        stacktrace_root, source_path = self.code_mapping_helper._normalized_stack_and_source_roots(
-            "source/", "source/"
-        )
+    def test_find_roots_equal(self):
+        stacktrace_root, source_path = find_roots("source/", "source/")
         assert stacktrace_root == ""
         assert source_path == ""
 
-    def test_normalized_stack_and_source_roots_starts_with_period_slash_two_levels(self):
-        stacktrace_root, source_path = self.code_mapping_helper._normalized_stack_and_source_roots(
-            "./app/", "app/foo/app/"
-        )
+    def test_find_roots_starts_with_period_slash_two_levels(self):
+        stacktrace_root, source_path = find_roots("./app/", "app/foo/app/")
         assert stacktrace_root == "./"
         assert source_path == "app/foo/"
 
-    def test_normalized_stack_and_source_roots_starts_with_app(self):
-        stacktrace_root, source_path = self.code_mapping_helper._normalized_stack_and_source_roots(
-            "app:///utils/", "utils/"
-        )
+    def test_find_roots_starts_with_app(self):
+        stacktrace_root, source_path = find_roots("app:///utils/", "utils/")
         assert stacktrace_root == "app:///"
         assert source_path == ""
 
-    def test_normalized_stack_and_source_roots_starts_with_multiple_dot_dot_slash(self):
-        stacktrace_root, source_path = self.code_mapping_helper._normalized_stack_and_source_roots(
-            "../../../../../../packages/", "packages/"
-        )
+    def test_find_roots_starts_with_multiple_dot_dot_slash(self):
+        stacktrace_root, source_path = find_roots("../../../../../../packages/", "packages/")
         assert stacktrace_root == "../../../../../../"
         assert source_path == ""
 
-    def test_normalized_stack_and_source_roots_starts_with_app_dot_dot_slash(self):
-        stacktrace_root, source_path = self.code_mapping_helper._normalized_stack_and_source_roots(
-            "app:///../services/", "services/"
-        )
+    def test_find_roots_starts_with_app_dot_dot_slash(self):
+        stacktrace_root, source_path = find_roots("app:///../services/", "services/")
         assert stacktrace_root == "app:///../"
         assert source_path == ""
 

+ 4 - 4
tests/sentry/tasks/test_derive_code_mappings.py

@@ -385,8 +385,8 @@ class TestPhpDeriveCodeMappings(BaseDeriveCodeMappings):
             }
             derive_code_mappings(self.project.id, self.event_data)
             code_mapping = RepositoryProjectPathConfig.objects.all()[0]
-            assert code_mapping.stack_root == "sentry/"
-            assert code_mapping.source_root == "src/sentry/"
+            assert code_mapping.stack_root == ""
+            assert code_mapping.source_root == "src/"
             assert code_mapping.repository.name == repo_name
 
 
@@ -562,7 +562,7 @@ class TestPythonDeriveCodeMappings(BaseDeriveCodeMappings):
             }
             derive_code_mappings(self.project.id, self.test_data)
             code_mapping = RepositoryProjectPathConfig.objects.all().first()
-            # sentry/models/release.py -> models/release.py -> sentry/models/release.py
-            # If the normalization code was used these would be the empty stack root
+
+            # This works, but ideally it should be "" and ""
             assert code_mapping.stack_root == "sentry/"
             assert code_mapping.source_root == "sentry/"