Просмотр исходного кода

ref(typing): add type hints to suspect commit for clarity (#34263)

* add typing to committers.
Gilbert Szeto 2 лет назад
Родитель
Сommit
321676d744

+ 1 - 0
mypy.ini

@@ -88,6 +88,7 @@ files = src/sentry/analytics/,
         src/sentry/utils/appleconnect/,
         src/sentry/utils/avatar.py,
         src/sentry/utils/codecs.py,
+        src/sentry/utils/committers.py,
         src/sentry/utils/dates.py,
         src/sentry/utils/email/,
         src/sentry/utils/jwt.py,

+ 3 - 2
src/sentry/api/serializers/models/commit.py

@@ -1,11 +1,12 @@
 from collections import defaultdict
+from typing import Mapping
 
 from sentry.api.serializers import Serializer, register, serialize
-from sentry.api.serializers.models.release import CommitAuthor, get_users_for_authors
+from sentry.api.serializers.models.release import Author, CommitAuthor, get_users_for_authors
 from sentry.models import Commit, Repository
 
 
-def get_users_for_commits(item_list, user=None):
+def get_users_for_commits(item_list, user=None) -> Mapping[str, Author]:
     authors = list(
         CommitAuthor.objects.get_many_from_cache([i.author_id for i in item_list if i.author_id])
     )

+ 15 - 4
src/sentry/api/serializers/models/release.py

@@ -1,10 +1,12 @@
 from collections import defaultdict
+from typing import Mapping, Sequence, TypedDict, Union
 
 from django.core.cache import cache
 from django.db.models import Sum
 
 from sentry import release_health, tagstore
 from sentry.api.serializers import Serializer, register, serialize
+from sentry.api.serializers.models.user import UserSerializerResponse
 from sentry.db.models.query import in_iexact
 from sentry.models import (
     Commit,
@@ -51,7 +53,15 @@ def _user_to_author_cache_key(organization_id, author):
     return f"get_users_for_authors:{organization_id}:{author_hash}"
 
 
-def get_users_for_authors(organization_id, authors, user=None):
+class NonMappableUser(TypedDict):
+    name: str
+    email: str
+
+
+Author = Union[UserSerializerResponse, NonMappableUser]
+
+
+def get_users_for_authors(organization_id, authors, user=None) -> Mapping[str, Author]:
     """
     Returns a dictionary of author_id => user, if a Sentry
     user object exists for that email. If there is no matching
@@ -59,8 +69,9 @@ def get_users_for_authors(organization_id, authors, user=None):
     author is returned.
     e.g.
     {
-        1: serialized(<User id=1>),
-        2: {email: 'not-a-user@example.com', name: 'dunno'},
+        '<author-id-1>': serialized(<User id=1, ...>),
+        '<author-id-2>': {'email': 'not-a-user@example.com', 'name': 'dunno'},
+        '<author-id-3>': serialized(<User id=3, ...>),
         ...
     }
     """
@@ -93,7 +104,7 @@ def get_users_for_authors(organization_id, authors, user=None):
             is_active=True,
             sentry_orgmember_set__organization_id=organization_id,
         )
-        users = serialize(list(users), user)
+        users: Sequence[UserSerializerResponse] = serialize(list(users), user)
         users_by_id = {user["id"]: user for user in users}
         # Figure out which email address matches to a user
         users_by_email = {}

+ 1 - 1
src/sentry/notifications/utils/__init__.py

@@ -218,7 +218,7 @@ def get_commits(project: Project, event: Event) -> Sequence[Mapping[str, Any]]:
         for committer in committers:
             for commit in committer["commits"]:
                 if commit["id"] not in commits:
-                    commit_data = commit.copy()
+                    commit_data = dict(commit)
                     commit_data["shortId"] = commit_data["id"][:7]
                     commit_data["author"] = committer["author"]
                     commit_data["subject"] = commit_data["message"].split("\n", 1)[0]

+ 1 - 4
src/sentry/ownership/grammar.py

@@ -27,7 +27,7 @@ from rest_framework.serializers import ValidationError
 from sentry.eventstore.models import EventSubjectTemplateData
 from sentry.models import ActorTuple, RepositoryProjectPathConfig
 from sentry.utils.glob import glob_match
-from sentry.utils.safe import get_path
+from sentry.utils.safe import PathSearchable, get_path
 
 __all__ = ("parse_rules", "dump_schema", "load_schema")
 
@@ -75,9 +75,6 @@ _       = space*
 )
 
 
-PathSearchable = Union[Mapping[str, Any], Sequence[Any]]
-
-
 class Rule(namedtuple("Rule", "matcher owners")):
     """
     A Rule represents a single line in an Ownership file.

+ 1 - 1
src/sentry/tasks/groupowner.py

@@ -54,7 +54,7 @@ def _process_suspect_commits(
                 )
             owner_scores = {}
             for committer in committers:
-                if "id" in committer["author"]:
+                if committer["author"] and "id" in committer["author"]:
                     author_id = committer["author"]["id"]
                     for commit, score in committer["commits"]:
                         if score >= MIN_COMMIT_SCORE:

+ 83 - 32
src/sentry/utils/committers.py

@@ -1,22 +1,36 @@
 import operator
 from collections import defaultdict
 from functools import reduce
+from typing import (
+    Any,
+    Iterator,
+    List,
+    Mapping,
+    MutableMapping,
+    Sequence,
+    Set,
+    Tuple,
+    TypedDict,
+    Union,
+)
 
 from django.core.cache import cache
 from django.db.models import Q
 
 from sentry.api.serializers import serialize
 from sentry.api.serializers.models.commit import CommitSerializer, get_users_for_commits
-from sentry.models import Commit, CommitFileChange, Group, Release, ReleaseCommit
+from sentry.api.serializers.models.release import Author
+from sentry.eventstore.models import Event
+from sentry.models import Commit, CommitFileChange, Group, Project, Release, ReleaseCommit
 from sentry.utils import metrics
 from sentry.utils.compat import zip
 from sentry.utils.hashlib import hash_values
-from sentry.utils.safe import get_path
+from sentry.utils.safe import PathSearchable, get_path
 
 PATH_SEPARATORS = frozenset(["/", "\\"])
 
 
-def tokenize_path(path):
+def tokenize_path(path: str) -> Iterator[str]:
     for sep in PATH_SEPARATORS:
         if sep in path:
             # Exclude empty path segments as some repository integrations
@@ -26,7 +40,7 @@ def tokenize_path(path):
         return iter([path])
 
 
-def score_path_match_length(path_a, path_b):
+def score_path_match_length(path_a: str, path_b: str) -> int:
     score = 0
     for a, b in zip(tokenize_path(path_a), tokenize_path(path_b)):
         if a.lower() != b.lower():
@@ -35,7 +49,7 @@ def score_path_match_length(path_a, path_b):
     return score
 
 
-def get_frame_paths(data):
+def get_frame_paths(data: PathSearchable) -> Union[Any, Sequence[Any]]:
     frames = get_path(data, "stacktrace", "frames", filter=True)
     if frames:
         return frames
@@ -43,11 +57,11 @@ def get_frame_paths(data):
     return get_path(data, "exception", "values", 0, "stacktrace", "frames", filter=True) or []
 
 
-def release_cache_key(release):
+def release_cache_key(release: Release) -> str:
     return f"release_commits:{release.id}"
 
 
-def _get_commits(releases):
+def _get_commits(releases: Sequence[Release]) -> Sequence[Commit]:
     commits = []
 
     fetched = cache.get_many([release_cache_key(release) for release in releases])
@@ -60,7 +74,7 @@ def _get_commits(releases):
             else:
                 commits += [c for c in cached_commits if c not in commits]
     else:
-        missed = releases
+        missed = list(releases)
 
     if missed:
         release_commits = ReleaseCommit.objects.filter(release__in=missed).select_related(
@@ -76,7 +90,9 @@ def _get_commits(releases):
     return commits
 
 
-def _get_commit_file_changes(commits, path_name_set):
+def _get_commit_file_changes(
+    commits: Sequence[Commit], path_name_set: Set[str]
+) -> Sequence[CommitFileChange]:
     # Get distinct file names and bail if there are no files.
     filenames = {next(tokenize_path(path), None) for path in path_name_set}
     filenames = {path for path in filenames if path is not None}
@@ -91,9 +107,11 @@ def _get_commit_file_changes(commits, path_name_set):
     return list(commit_file_change_matches)
 
 
-def _match_commits_path(commit_file_changes, path):
+def _match_commits_path(
+    commit_file_changes: Sequence[CommitFileChange], path: str
+) -> Sequence[Tuple[Commit, int]]:
     # find commits that match the run time path the best.
-    matching_commits = {}
+    matching_commits: MutableMapping[int, Tuple[Commit, int]] = {}
     best_score = 1
     for file_change in commit_file_changes:
         score = score_path_match_length(file_change.filename, path)
@@ -111,9 +129,22 @@ def _match_commits_path(commit_file_changes, path):
     return list(matching_commits.values())
 
 
-def _get_committers(annotated_frames, commits):
+class AuthorCommits(TypedDict):
+    author: Author
+    commits: Sequence[Tuple[Commit, int]]
+
+
+class AuthorCommitsSerialized(TypedDict):
+    author: Author
+    commits: Sequence[MutableMapping[str, Any]]
+
+
+def _get_committers(
+    annotated_frames: Sequence[MutableMapping[str, Any]],
+    commits: Sequence[Tuple[Commit, int]],
+) -> Sequence[AuthorCommits]:
     # extract the unique committers and return their serialized sentry accounts
-    committers = defaultdict(int)
+    committers: MutableMapping[int, int] = defaultdict(int)
 
     limit = 5
     for annotated_frame in annotated_frames:
@@ -128,23 +159,26 @@ def _get_committers(annotated_frames, commits):
                 break
 
     # organize them by this heuristic (first frame is worth 5 points, second is worth 4, etc.)
-    sorted_committers = sorted(committers, key=committers.get)
-    users_by_author = get_users_for_commits([c for c, _ in commits])
+    sorted_committers = sorted(committers.items(), key=operator.itemgetter(1))
 
-    user_dicts = [
+    users_by_author: Mapping[str, Author] = get_users_for_commits([c for c, _ in commits])
+
+    user_dicts: Sequence[AuthorCommits] = [
         {
-            "author": users_by_author.get(str(author_id)),
+            "author": users_by_author.get(str(author_id), {}),
             "commits": [
                 (commit, score) for (commit, score) in commits if commit.author_id == author_id
             ],
         }
-        for author_id in sorted_committers
+        for author_id, _ in sorted_committers
     ]
 
     return user_dicts
 
 
-def get_previous_releases(project, start_version, limit=5):
+def get_previous_releases(
+    project: Project, start_version: str, limit: int = 5
+) -> Union[Any, Sequence[Release]]:
     # given a release version + project, return the previous
     # `limit` releases (includes the release specified by `version`)
     key = "get_previous_releases:1:%s" % hash_values([project.id, start_version, limit])
@@ -193,7 +227,13 @@ def get_previous_releases(project, start_version, limit=5):
     return rv
 
 
-def get_event_file_committers(project, group_id, event_frames, event_platform, frame_limit=25):
+def get_event_file_committers(
+    project: Project,
+    group_id: int,
+    event_frames: Sequence[MutableMapping[str, Any]],
+    event_platform: str,
+    frame_limit: int = 25,
+) -> Sequence[AuthorCommits]:
     group = Group.objects.get_from_cache(id=group_id)
 
     first_release_version = group.get_first_release()
@@ -224,7 +264,7 @@ def get_event_file_committers(project, group_id, event_frames, event_platform, f
         for frame in frames:
             if frame.get("filename") is None:
                 continue
-            if "/" not in frame.get("filename") and frame.get("module"):
+            if "/" not in str(frame.get("filename")) and frame.get("module"):
                 # Replace the last module segment with the filename, as the
                 # terminal element in a module path is the class
                 module = frame["module"].split(".")
@@ -235,38 +275,44 @@ def get_event_file_committers(project, group_id, event_frames, event_platform, f
     # XXX(dcramer): frames may not define a filepath. For example, in Java its common
     # to only have a module name/path
     path_set = {
-        f for f in (frame.get("filename") or frame.get("abs_path") for frame in app_frames) if f
+        str(f)
+        for f in (frame.get("filename") or frame.get("abs_path") for frame in app_frames)
+        if f
     }
 
-    file_changes = []
+    file_changes: Sequence[CommitFileChange] = []
     if path_set:
         file_changes = _get_commit_file_changes(commits, path_set)
 
-    commit_path_matches = {path: _match_commits_path(file_changes, path) for path in path_set}
+    commit_path_matches: Mapping[str, Sequence[Tuple[Commit, int]]] = {
+        path: _match_commits_path(file_changes, path) for path in path_set
+    }
 
     annotated_frames = [
         {
             "frame": frame,
-            "commits": commit_path_matches.get(frame.get("filename") or frame.get("abs_path"))
+            "commits": commit_path_matches.get(str(frame.get("filename") or frame.get("abs_path")))
             or [],
         }
         for frame in app_frames
     ]
 
-    relevant_commits = list(
-        {match for match in commit_path_matches for match in commit_path_matches[match]}
+    relevant_commits: Sequence[Tuple[Commit, int]] = list(
+        {match for matches in commit_path_matches.values() for match in matches}
     )
 
     return _get_committers(annotated_frames, relevant_commits)
 
 
-def get_serialized_event_file_committers(project, event, frame_limit=25):
+def get_serialized_event_file_committers(
+    project: Project, event: Event, frame_limit: int = 25
+) -> Sequence[AuthorCommitsSerialized]:
     event_frames = get_frame_paths(event.data)
     committers = get_event_file_committers(
         project, event.group_id, event_frames, event.platform, frame_limit=frame_limit
     )
     commits = [commit for committer in committers for commit in committer["commits"]]
-    serialized_commits = serialize(
+    serialized_commits: Sequence[MutableMapping[str, Any]] = serialize(
         [c for (c, score) in commits], serializer=CommitSerializer(exclude=["author"])
     )
 
@@ -276,18 +322,23 @@ def get_serialized_event_file_committers(project, event, frame_limit=25):
         serialized_commit["score"] = score
         serialized_commits_by_id[commit.id] = serialized_commit
 
+    serialized_committers: List[AuthorCommitsSerialized] = []
     for committer in committers:
         commit_ids = [commit.id for (commit, _) in committer["commits"]]
         commits_result = [serialized_commits_by_id[commit_id] for commit_id in commit_ids]
-        committer["commits"] = dedupe_commits(commits_result)
+        serialized_committers.append(
+            {"author": committer["author"], "commits": dedupe_commits(commits_result)}
+        )
 
     metrics.incr(
         "feature.owners.has-committers",
         instance="hit" if committers else "miss",
         skip_internal=False,
     )
-    return committers
+    return serialized_committers
 
 
-def dedupe_commits(commits):
+def dedupe_commits(
+    commits: Sequence[MutableMapping[str, Any]]
+) -> Sequence[MutableMapping[str, Any]]:
     return list({c["id"]: c for c in commits}.values())

+ 4 - 2
src/sentry/utils/safe.py

@@ -1,5 +1,5 @@
 import logging
-from collections.abc import Mapping
+from typing import Any, Mapping, Sequence, Union
 
 import sentry_sdk
 from django.conf import settings
@@ -11,6 +11,8 @@ from sentry.utils import json
 from sentry.utils.compat import filter
 from sentry.utils.strings import truncatechars
 
+PathSearchable = Union[Mapping[str, Any], Sequence[Any]]
+
 
 def safe_execute(func, *args, **kwargs):
     # TODO: we should make smart savepoints (only executing the savepoint server
@@ -122,7 +124,7 @@ def trim_dict(value, max_items=settings.SENTRY_MAX_DICTIONARY_ITEMS, **kwargs):
     return value
 
 
-def get_path(data, *path, **kwargs):
+def get_path(data: PathSearchable, *path, **kwargs):
     """
     Safely resolves data from a recursive data structure. A value is only
     returned if the full path exists, otherwise ``None`` is returned.