Browse Source

fix: downgrade unidiff to fix patches to filenames with spaces (#34691)

anthony sottile 2 years ago
parent
commit
0151a14ab0

+ 3 - 0
mypy.ini

@@ -95,6 +95,7 @@ files = src/sentry/analytics/,
         src/sentry/utils/jwt.py,
         src/sentry/utils/kvstore,
         src/sentry/utils/outcomes.py,
+        src/sentry/utils/patch_set.py,
         src/sentry/utils/services.py,
         src/sentry/utils/time_window.py,
         src/sentry/web/decorators.py,
@@ -148,6 +149,8 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-toronado]
 ignore_missing_imports = True
+[mypy-unidiff]
+ignore_missing_imports = True
 [mypy-zstandard]
 ignore_missing_imports = True
 [mypy-msgpack]

+ 1 - 1
requirements-base.txt

@@ -65,7 +65,7 @@ symbolic==8.7.1
 toronado==0.1.0
 typing-extensions==3.10.0.2
 ua-parser==0.10.0
-unidiff==0.7.3
+unidiff==0.7.1
 urllib3[brotli]==1.25.11
 brotlipy==0.7.0
 # See if we can remove LDFLAGS from lib.sh

+ 2 - 18
src/sentry/integrations/bitbucket/client.py

@@ -1,12 +1,11 @@
 import datetime
 from urllib.parse import urlparse
 
-from unidiff import PatchSet
-
 from sentry.integrations.client import ApiClient
 from sentry.integrations.utils import get_query_hash
 from sentry.utils import jwt
 from sentry.utils.http import absolute_uri
+from sentry.utils.patch_set import patch_to_file_changes
 
 BITBUCKET_KEY = f"{urlparse(absolute_uri()).hostname}.bitbucket"
 
@@ -103,26 +102,11 @@ class BitbucketApiClient(ApiClient):
     def delete_hook(self, repo, hook_id):
         return self.delete(path=BitbucketAPIPath.repository_hook.format(repo=repo, uid=hook_id))
 
-    def transform_patchset(self, patch_set):
-        file_changes = []
-        for patched_file in patch_set.added_files:
-            file_changes.append({"path": patched_file.path, "type": "A"})
-
-        for patched_file in patch_set.removed_files:
-            file_changes.append({"path": patched_file.path, "type": "D"})
-
-        for patched_file in patch_set.modified_files:
-            file_changes.append({"path": patched_file.path, "type": "M"})
-
-        return file_changes
-
     def get_commit_filechanges(self, repo, sha):
         resp = self.get(
             BitbucketAPIPath.repository_diff.format(repo=repo, spec=sha), allow_text=True
         )
-        diff_file = resp.text
-        ps = PatchSet.from_string(diff_file)
-        return self.transform_patchset(ps)
+        return patch_to_file_changes(resp.text)
 
     def zip_commit_data(self, repo, commit_list):
         for commit in commit_list:

+ 26 - 0
src/sentry/utils/patch_set.py

@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from typing import Literal, TypedDict
+
+import unidiff
+
+
+class FileChange(TypedDict):
+    path: str
+    type: Literal["A", "D", "M"]
+
+
+def patch_to_file_changes(patch: str) -> list[FileChange]:
+    patch_set = unidiff.PatchSet.from_string(patch)
+
+    file_changes: list[FileChange] = []
+    for patched_file in patch_set.added_files:
+        file_changes.append({"path": patched_file.path, "type": "A"})
+
+    for patched_file in patch_set.removed_files:
+        file_changes.append({"path": patched_file.path, "type": "D"})
+
+    for patched_file in patch_set.modified_files:
+        file_changes.append({"path": patched_file.path, "type": "M"})
+
+    return file_changes

+ 2 - 19
src/sentry_plugins/bitbucket/client.py

@@ -1,7 +1,7 @@
 from django.conf import settings
 from requests_oauthlib import OAuth1
-from unidiff import PatchSet
 
+from sentry.utils.patch_set import patch_to_file_changes
 from sentry_plugins.client import AuthApiClient
 
 
@@ -55,27 +55,10 @@ class BitbucketClient(AuthApiClient):
     def delete_hook(self, repo, id):
         return self.delete(f"/2.0/repositories/{repo}/hooks/{id}")
 
-    def transform_patchset(self, patch_set):
-        file_changes = []
-        for patched_file in patch_set.added_files:
-            file_changes.append({"path": patched_file.path, "type": "A"})
-
-        for patched_file in patch_set.removed_files:
-            file_changes.append({"path": patched_file.path, "type": "D"})
-
-        for patched_file in patch_set.modified_files:
-            file_changes.append({"path": patched_file.path, "type": "M"})
-
-        return file_changes
-
     def get_commit_filechanges(self, repo, sha):
         # returns unidiff file
-
         resp = self.get(f"/2.0/repositories/{repo}/diff/{sha}", allow_text=True)
-
-        diff_file = resp.text
-        ps = PatchSet.from_string(diff_file)
-        return self.transform_patchset(ps)
+        return patch_to_file_changes(resp.text)
 
     def zip_commit_data(self, repo, commit_list):
         for commit in commit_list:

+ 16 - 0
tests/sentry/utils/test_patch_set.py

@@ -0,0 +1,16 @@
+from sentry.utils.patch_set import patch_to_file_changes
+
+
+def test_filename_containing_spaces():
+    # regression test for https://sentry.io/organizations/sentry/issues/3279066697
+    patch = """\
+diff --git a/has spaces/t.sql b/has spaces/t.sql
+new file mode 100644
+index 0000000..8a9b485
+--- /dev/null
++++ b/has spaces/t.sql
+@@ -0,0 +1 @@
++select * FROM t;
+"""
+    expected = [{"path": "has spaces/t.sql", "type": "A"}]
+    assert patch_to_file_changes(patch) == expected