2 лет назад · 321676d744
--- a/mypy.ini
+++ b/mypy.ini
@@ -88,6 +88,7 @@ files = src/sentry/analytics/,
 
				         src/sentry/utils/appleconnect/,
			
 
				         src/sentry/utils/avatar.py,
			
 
				         src/sentry/utils/codecs.py,
			
 
				+        src/sentry/utils/committers.py,
			
 
				         src/sentry/utils/dates.py,
			
 
				         src/sentry/utils/email/,
			
 
				         src/sentry/utils/jwt.py,
			
--- a/src/sentry/api/serializers/models/commit.py
+++ b/src/sentry/api/serializers/models/commit.py
@@ -1,11 +1,12 @@
 
				 from collections import defaultdict
			
 
				+from typing import Mapping
			
 
				 
			
 
				 from sentry.api.serializers import Serializer, register, serialize
			
 
				-from sentry.api.serializers.models.release import CommitAuthor, get_users_for_authors
			
 
				+from sentry.api.serializers.models.release import Author, CommitAuthor, get_users_for_authors
			
 
				 from sentry.models import Commit, Repository
			
 
				 
			
 
				 
			
 
				-def get_users_for_commits(item_list, user=None):
			
 
				+def get_users_for_commits(item_list, user=None) -> Mapping[str, Author]:
			
 
				     authors = list(
			
 
				         CommitAuthor.objects.get_many_from_cache([i.author_id for i in item_list if i.author_id])
			
 
				     )
			
--- a/src/sentry/api/serializers/models/release.py
+++ b/src/sentry/api/serializers/models/release.py
@@ -1,10 +1,12 @@
 
				 from collections import defaultdict
			
 
				+from typing import Mapping, Sequence, TypedDict, Union
			
 
				 
			
 
				 from django.core.cache import cache
			
 
				 from django.db.models import Sum
			
 
				 
			
 
				 from sentry import release_health, tagstore
			
 
				 from sentry.api.serializers import Serializer, register, serialize
			
 
				+from sentry.api.serializers.models.user import UserSerializerResponse
			
 
				 from sentry.db.models.query import in_iexact
			
 
				 from sentry.models import (
			
 
				     Commit,
			
@@ -51,7 +53,15 @@ def _user_to_author_cache_key(organization_id, author):
 
				     return f"get_users_for_authors:{organization_id}:{author_hash}"
			
 
				 
			
 
				 
			
 
				-def get_users_for_authors(organization_id, authors, user=None):
			
 
				+class NonMappableUser(TypedDict):
			
 
				+    name: str
			
 
				+    email: str
			
 
				+
			
 
				+
			
 
				+Author = Union[UserSerializerResponse, NonMappableUser]
			
 
				+
			
 
				+
			
 
				+def get_users_for_authors(organization_id, authors, user=None) -> Mapping[str, Author]:
			
 
				     """
			
 
				     Returns a dictionary of author_id => user, if a Sentry
			
 
				     user object exists for that email. If there is no matching
			
@@ -59,8 +69,9 @@ def get_users_for_authors(organization_id, authors, user=None):
 
				     author is returned.
			
 
				     e.g.
			
 
				     {
			
 
				-        1: serialized(<User id=1>),
			
 
				-        2: {email: 'not-a-user@example.com', name: 'dunno'},
			
 
				+        '<author-id-1>': serialized(<User id=1, ...>),
			
 
				+        '<author-id-2>': {'email': 'not-a-user@example.com', 'name': 'dunno'},
			
 
				+        '<author-id-3>': serialized(<User id=3, ...>),
			
 
				         ...
			
 
				     }
			
 
				     """
			
@@ -93,7 +104,7 @@ def get_users_for_authors(organization_id, authors, user=None):
 
				             is_active=True,
			
 
				             sentry_orgmember_set__organization_id=organization_id,
			
 
				         )
			
 
				-        users = serialize(list(users), user)
			
 
				+        users: Sequence[UserSerializerResponse] = serialize(list(users), user)
			
 
				         users_by_id = {user["id"]: user for user in users}
			
 
				         # Figure out which email address matches to a user
			
 
				         users_by_email = {}
			
--- a/src/sentry/notifications/utils/__init__.py
+++ b/src/sentry/notifications/utils/__init__.py
@@ -218,7 +218,7 @@ def get_commits(project: Project, event: Event) -> Sequence[Mapping[str, Any]]:
 
				         for committer in committers:
			
 
				             for commit in committer["commits"]:
			
 
				                 if commit["id"] not in commits:
			
 
				-                    commit_data = commit.copy()
			
 
				+                    commit_data = dict(commit)
			
 
				                     commit_data["shortId"] = commit_data["id"][:7]
			
 
				                     commit_data["author"] = committer["author"]
			
 
				                     commit_data["subject"] = commit_data["message"].split("\n", 1)[0]
			
--- a/src/sentry/ownership/grammar.py
+++ b/src/sentry/ownership/grammar.py
@@ -27,7 +27,7 @@ from rest_framework.serializers import ValidationError
 
				 from sentry.eventstore.models import EventSubjectTemplateData
			
 
				 from sentry.models import ActorTuple, RepositoryProjectPathConfig
			
 
				 from sentry.utils.glob import glob_match
			
 
				-from sentry.utils.safe import get_path
			
 
				+from sentry.utils.safe import PathSearchable, get_path
			
 
				 
			
 
				 __all__ = ("parse_rules", "dump_schema", "load_schema")
			
 
				 
			
@@ -75,9 +75,6 @@ _       = space*
 
				 )
			
 
				 
			
 
				 
			
 
				-PathSearchable = Union[Mapping[str, Any], Sequence[Any]]
			
 
				-
			
 
				-
			
 
				 class Rule(namedtuple("Rule", "matcher owners")):
			
 
				     """
			
 
				     A Rule represents a single line in an Ownership file.
			
--- a/src/sentry/tasks/groupowner.py
+++ b/src/sentry/tasks/groupowner.py
@@ -54,7 +54,7 @@ def _process_suspect_commits(
 
				                 )
			
 
				             owner_scores = {}
			
 
				             for committer in committers:
			
 
				-                if "id" in committer["author"]:
			
 
				+                if committer["author"] and "id" in committer["author"]:
			
 
				                     author_id = committer["author"]["id"]
			
 
				                     for commit, score in committer["commits"]:
			
 
				                         if score >= MIN_COMMIT_SCORE:
			
--- a/src/sentry/utils/committers.py
+++ b/src/sentry/utils/committers.py
@@ -1,22 +1,36 @@
 
				 import operator
			
 
				 from collections import defaultdict
			
 
				 from functools import reduce
			
 
				+from typing import (
			
 
				+    Any,
			
 
				+    Iterator,
			
 
				+    List,
			
 
				+    Mapping,
			
 
				+    MutableMapping,
			
 
				+    Sequence,
			
 
				+    Set,
			
 
				+    Tuple,
			
 
				+    TypedDict,
			
 
				+    Union,
			
 
				+)
			
 
				 
			
 
				 from django.core.cache import cache
			
 
				 from django.db.models import Q
			
 
				 
			
 
				 from sentry.api.serializers import serialize
			
 
				 from sentry.api.serializers.models.commit import CommitSerializer, get_users_for_commits
			
 
				-from sentry.models import Commit, CommitFileChange, Group, Release, ReleaseCommit
			
 
				+from sentry.api.serializers.models.release import Author
			
 
				+from sentry.eventstore.models import Event
			
 
				+from sentry.models import Commit, CommitFileChange, Group, Project, Release, ReleaseCommit
			
 
				 from sentry.utils import metrics
			
 
				 from sentry.utils.compat import zip
			
 
				 from sentry.utils.hashlib import hash_values
			
 
				-from sentry.utils.safe import get_path
			
 
				+from sentry.utils.safe import PathSearchable, get_path
			
 
				 
			
 
				 PATH_SEPARATORS = frozenset(["/", "\\"])
			
 
				 
			
 
				 
			
 
				-def tokenize_path(path):
			
 
				+def tokenize_path(path: str) -> Iterator[str]:
			
 
				     for sep in PATH_SEPARATORS:
			
 
				         if sep in path:
			
 
				             # Exclude empty path segments as some repository integrations
			
@@ -26,7 +40,7 @@ def tokenize_path(path):
 
				         return iter([path])
			
 
				 
			
 
				 
			
 
				-def score_path_match_length(path_a, path_b):
			
 
				+def score_path_match_length(path_a: str, path_b: str) -> int:
			
 
				     score = 0
			
 
				     for a, b in zip(tokenize_path(path_a), tokenize_path(path_b)):
			
 
				         if a.lower() != b.lower():
			
@@ -35,7 +49,7 @@ def score_path_match_length(path_a, path_b):
 
				     return score
			
 
				 
			
 
				 
			
 
				-def get_frame_paths(data):
			
 
				+def get_frame_paths(data: PathSearchable) -> Union[Any, Sequence[Any]]:
			
 
				     frames = get_path(data, "stacktrace", "frames", filter=True)
			
 
				     if frames:
			
 
				         return frames
			
@@ -43,11 +57,11 @@ def get_frame_paths(data):
 
				     return get_path(data, "exception", "values", 0, "stacktrace", "frames", filter=True) or []
			
 
				 
			
 
				 
			
 
				-def release_cache_key(release):
			
 
				+def release_cache_key(release: Release) -> str:
			
 
				     return f"release_commits:{release.id}"
			
 
				 
			
 
				 
			
 
				-def _get_commits(releases):
			
 
				+def _get_commits(releases: Sequence[Release]) -> Sequence[Commit]:
			
 
				     commits = []
			
 
				 
			
 
				     fetched = cache.get_many([release_cache_key(release) for release in releases])
			
@@ -60,7 +74,7 @@ def _get_commits(releases):
 
				             else:
			
 
				                 commits += [c for c in cached_commits if c not in commits]
			
 
				     else:
			
 
				-        missed = releases
			
 
				+        missed = list(releases)
			
 
				 
			
 
				     if missed:
			
 
				         release_commits = ReleaseCommit.objects.filter(release__in=missed).select_related(
			
@@ -76,7 +90,9 @@ def _get_commits(releases):
 
				     return commits
			
 
				 
			
 
				 
			
 
				-def _get_commit_file_changes(commits, path_name_set):
			
 
				+def _get_commit_file_changes(
			
 
				+    commits: Sequence[Commit], path_name_set: Set[str]
			
 
				+) -> Sequence[CommitFileChange]:
			
 
				     # Get distinct file names and bail if there are no files.
			
 
				     filenames = {next(tokenize_path(path), None) for path in path_name_set}
			
 
				     filenames = {path for path in filenames if path is not None}
			
@@ -91,9 +107,11 @@ def _get_commit_file_changes(commits, path_name_set):
 
				     return list(commit_file_change_matches)
			
 
				 
			
 
				 
			
 
				-def _match_commits_path(commit_file_changes, path):
			
 
				+def _match_commits_path(
			
 
				+    commit_file_changes: Sequence[CommitFileChange], path: str
			
 
				+) -> Sequence[Tuple[Commit, int]]:
			
 
				     # find commits that match the run time path the best.
			
 
				-    matching_commits = {}
			
 
				+    matching_commits: MutableMapping[int, Tuple[Commit, int]] = {}
			
 
				     best_score = 1
			
 
				     for file_change in commit_file_changes:
			
 
				         score = score_path_match_length(file_change.filename, path)
			
@@ -111,9 +129,22 @@ def _match_commits_path(commit_file_changes, path):
 
				     return list(matching_commits.values())
			
 
				 
			
 
				 
			
 
				-def _get_committers(annotated_frames, commits):
			
 
				+class AuthorCommits(TypedDict):
			
 
				+    author: Author
			
 
				+    commits: Sequence[Tuple[Commit, int]]
			
 
				+
			
 
				+
			
 
				+class AuthorCommitsSerialized(TypedDict):
			
 
				+    author: Author
			
 
				+    commits: Sequence[MutableMapping[str, Any]]
			
 
				+
			
 
				+
			
 
				+def _get_committers(
			
 
				+    annotated_frames: Sequence[MutableMapping[str, Any]],
			
 
				+    commits: Sequence[Tuple[Commit, int]],
			
 
				+) -> Sequence[AuthorCommits]:
			
 
				     # extract the unique committers and return their serialized sentry accounts
			
 
				-    committers = defaultdict(int)
			
 
				+    committers: MutableMapping[int, int] = defaultdict(int)
			
 
				 
			
 
				     limit = 5
			
 
				     for annotated_frame in annotated_frames:
			
@@ -128,23 +159,26 @@ def _get_committers(annotated_frames, commits):
 
				                 break
			
 
				 
			
 
				     # organize them by this heuristic (first frame is worth 5 points, second is worth 4, etc.)
			
 
				-    sorted_committers = sorted(committers, key=committers.get)
			
 
				-    users_by_author = get_users_for_commits([c for c, _ in commits])
			
 
				+    sorted_committers = sorted(committers.items(), key=operator.itemgetter(1))
			
 
				 
			
 
				-    user_dicts = [
			
 
				+    users_by_author: Mapping[str, Author] = get_users_for_commits([c for c, _ in commits])
			
 
				+
			
 
				+    user_dicts: Sequence[AuthorCommits] = [
			
 
				         {
			
 
				-            "author": users_by_author.get(str(author_id)),
			
 
				+            "author": users_by_author.get(str(author_id), {}),
			
 
				             "commits": [
			
 
				                 (commit, score) for (commit, score) in commits if commit.author_id == author_id
			
 
				             ],
			
 
				         }
			
 
				-        for author_id in sorted_committers
			
 
				+        for author_id, _ in sorted_committers
			
 
				     ]
			
 
				 
			
 
				     return user_dicts
			
 
				 
			
 
				 
			
 
				-def get_previous_releases(project, start_version, limit=5):
			
 
				+def get_previous_releases(
			
 
				+    project: Project, start_version: str, limit: int = 5
			
 
				+) -> Union[Any, Sequence[Release]]:
			
 
				     # given a release version + project, return the previous
			
 
				     # `limit` releases (includes the release specified by `version`)
			
 
				     key = "get_previous_releases:1:%s" % hash_values([project.id, start_version, limit])
			
@@ -193,7 +227,13 @@ def get_previous_releases(project, start_version, limit=5):
 
				     return rv
			
 
				 
			
 
				 
			
 
				-def get_event_file_committers(project, group_id, event_frames, event_platform, frame_limit=25):
			
 
				+def get_event_file_committers(
			
 
				+    project: Project,
			
 
				+    group_id: int,
			
 
				+    event_frames: Sequence[MutableMapping[str, Any]],
			
 
				+    event_platform: str,
			
 
				+    frame_limit: int = 25,
			
 
				+) -> Sequence[AuthorCommits]:
			
 
				     group = Group.objects.get_from_cache(id=group_id)
			
 
				 
			
 
				     first_release_version = group.get_first_release()
			
@@ -224,7 +264,7 @@ def get_event_file_committers(project, group_id, event_frames, event_platform, f
 
				         for frame in frames:
			
 
				             if frame.get("filename") is None:
			
 
				                 continue
			
 
				-            if "/" not in frame.get("filename") and frame.get("module"):
			
 
				+            if "/" not in str(frame.get("filename")) and frame.get("module"):
			
 
				                 # Replace the last module segment with the filename, as the
			
 
				                 # terminal element in a module path is the class
			
 
				                 module = frame["module"].split(".")
			
@@ -235,38 +275,44 @@ def get_event_file_committers(project, group_id, event_frames, event_platform, f
 
				     # XXX(dcramer): frames may not define a filepath. For example, in Java its common
			
 
				     # to only have a module name/path
			
 
				     path_set = {
			
 
				-        f for f in (frame.get("filename") or frame.get("abs_path") for frame in app_frames) if f
			
 
				+        str(f)
			
 
				+        for f in (frame.get("filename") or frame.get("abs_path") for frame in app_frames)
			
 
				+        if f
			
 
				     }
			
 
				 
			
 
				-    file_changes = []
			
 
				+    file_changes: Sequence[CommitFileChange] = []
			
 
				     if path_set:
			
 
				         file_changes = _get_commit_file_changes(commits, path_set)
			
 
				 
			
 
				-    commit_path_matches = {path: _match_commits_path(file_changes, path) for path in path_set}
			
 
				+    commit_path_matches: Mapping[str, Sequence[Tuple[Commit, int]]] = {
			
 
				+        path: _match_commits_path(file_changes, path) for path in path_set
			
 
				+    }
			
 
				 
			
 
				     annotated_frames = [
			
 
				         {
			
 
				             "frame": frame,
			
 
				-            "commits": commit_path_matches.get(frame.get("filename") or frame.get("abs_path"))
			
 
				+            "commits": commit_path_matches.get(str(frame.get("filename") or frame.get("abs_path")))
			
 
				             or [],
			
 
				         }
			
 
				         for frame in app_frames
			
 
				     ]
			
 
				 
			
 
				-    relevant_commits = list(
			
 
				-        {match for match in commit_path_matches for match in commit_path_matches[match]}
			
 
				+    relevant_commits: Sequence[Tuple[Commit, int]] = list(
			
 
				+        {match for matches in commit_path_matches.values() for match in matches}
			
 
				     )
			
 
				 
			
 
				     return _get_committers(annotated_frames, relevant_commits)
			
 
				 
			
 
				 
			
 
				-def get_serialized_event_file_committers(project, event, frame_limit=25):
			
 
				+def get_serialized_event_file_committers(
			
 
				+    project: Project, event: Event, frame_limit: int = 25
			
 
				+) -> Sequence[AuthorCommitsSerialized]:
			
 
				     event_frames = get_frame_paths(event.data)
			
 
				     committers = get_event_file_committers(
			
 
				         project, event.group_id, event_frames, event.platform, frame_limit=frame_limit
			
 
				     )
			
 
				     commits = [commit for committer in committers for commit in committer["commits"]]
			
 
				-    serialized_commits = serialize(
			
 
				+    serialized_commits: Sequence[MutableMapping[str, Any]] = serialize(
			
 
				         [c for (c, score) in commits], serializer=CommitSerializer(exclude=["author"])
			
 
				     )
			
 
				 
			
@@ -276,18 +322,23 @@ def get_serialized_event_file_committers(project, event, frame_limit=25):
 
				         serialized_commit["score"] = score
			
 
				         serialized_commits_by_id[commit.id] = serialized_commit
			
 
				 
			
 
				+    serialized_committers: List[AuthorCommitsSerialized] = []
			
 
				     for committer in committers:
			
 
				         commit_ids = [commit.id for (commit, _) in committer["commits"]]
			
 
				         commits_result = [serialized_commits_by_id[commit_id] for commit_id in commit_ids]
			
 
				-        committer["commits"] = dedupe_commits(commits_result)
			
 
				+        serialized_committers.append(
			
 
				+            {"author": committer["author"], "commits": dedupe_commits(commits_result)}
			
 
				+        )
			
 
				 
			
 
				     metrics.incr(
			
 
				         "feature.owners.has-committers",
			
 
				         instance="hit" if committers else "miss",
			
 
				         skip_internal=False,
			
 
				     )
			
 
				-    return committers
			
 
				+    return serialized_committers
			
 
				 
			
 
				 
			
 
				-def dedupe_commits(commits):
			
 
				+def dedupe_commits(
			
 
				+    commits: Sequence[MutableMapping[str, Any]]
			
 
				+) -> Sequence[MutableMapping[str, Any]]:
			
 
				     return list({c["id"]: c for c in commits}.values())
			
--- a/src/sentry/utils/safe.py
+++ b/src/sentry/utils/safe.py
@@ -1,5 +1,5 @@
 
				 import logging
			
 
				-from collections.abc import Mapping
			
 
				+from typing import Any, Mapping, Sequence, Union
			
 
				 
			
 
				 import sentry_sdk
			
 
				 from django.conf import settings
			
@@ -11,6 +11,8 @@ from sentry.utils import json
 
				 from sentry.utils.compat import filter
			
 
				 from sentry.utils.strings import truncatechars
			
 
				 
			
 
				+PathSearchable = Union[Mapping[str, Any], Sequence[Any]]
			
 
				+
			
 
				 
			
 
				 def safe_execute(func, *args, **kwargs):
			
 
				     # TODO: we should make smart savepoints (only executing the savepoint server
			
@@ -122,7 +124,7 @@ def trim_dict(value, max_items=settings.SENTRY_MAX_DICTIONARY_ITEMS, **kwargs):
 
				     return value
			
 
				 
			
 
				 
			
 
				-def get_path(data, *path, **kwargs):
			
 
				+def get_path(data: PathSearchable, *path, **kwargs):
			
 
				     """
			
 
				     Safely resolves data from a recursive data structure. A value is only
			
 
				     returned if the full path exists, otherwise ``None`` is returned.