|
@@ -1,22 +1,36 @@
|
|
|
import operator
|
|
|
from collections import defaultdict
|
|
|
from functools import reduce
|
|
|
+from typing import (
|
|
|
+ Any,
|
|
|
+ Iterator,
|
|
|
+ List,
|
|
|
+ Mapping,
|
|
|
+ MutableMapping,
|
|
|
+ Sequence,
|
|
|
+ Set,
|
|
|
+ Tuple,
|
|
|
+ TypedDict,
|
|
|
+ Union,
|
|
|
+)
|
|
|
|
|
|
from django.core.cache import cache
|
|
|
from django.db.models import Q
|
|
|
|
|
|
from sentry.api.serializers import serialize
|
|
|
from sentry.api.serializers.models.commit import CommitSerializer, get_users_for_commits
|
|
|
-from sentry.models import Commit, CommitFileChange, Group, Release, ReleaseCommit
|
|
|
+from sentry.api.serializers.models.release import Author
|
|
|
+from sentry.eventstore.models import Event
|
|
|
+from sentry.models import Commit, CommitFileChange, Group, Project, Release, ReleaseCommit
|
|
|
from sentry.utils import metrics
|
|
|
from sentry.utils.compat import zip
|
|
|
from sentry.utils.hashlib import hash_values
|
|
|
-from sentry.utils.safe import get_path
|
|
|
+from sentry.utils.safe import PathSearchable, get_path
|
|
|
|
|
|
PATH_SEPARATORS = frozenset(["/", "\\"])
|
|
|
|
|
|
|
|
|
-def tokenize_path(path):
|
|
|
+def tokenize_path(path: str) -> Iterator[str]:
|
|
|
for sep in PATH_SEPARATORS:
|
|
|
if sep in path:
|
|
|
# Exclude empty path segments as some repository integrations
|
|
@@ -26,7 +40,7 @@ def tokenize_path(path):
|
|
|
return iter([path])
|
|
|
|
|
|
|
|
|
-def score_path_match_length(path_a, path_b):
|
|
|
+def score_path_match_length(path_a: str, path_b: str) -> int:
|
|
|
score = 0
|
|
|
for a, b in zip(tokenize_path(path_a), tokenize_path(path_b)):
|
|
|
if a.lower() != b.lower():
|
|
@@ -35,7 +49,7 @@ def score_path_match_length(path_a, path_b):
|
|
|
return score
|
|
|
|
|
|
|
|
|
-def get_frame_paths(data):
|
|
|
+def get_frame_paths(data: PathSearchable) -> Union[Any, Sequence[Any]]:
|
|
|
frames = get_path(data, "stacktrace", "frames", filter=True)
|
|
|
if frames:
|
|
|
return frames
|
|
@@ -43,11 +57,11 @@ def get_frame_paths(data):
|
|
|
return get_path(data, "exception", "values", 0, "stacktrace", "frames", filter=True) or []
|
|
|
|
|
|
|
|
|
-def release_cache_key(release):
|
|
|
+def release_cache_key(release: Release) -> str:
|
|
|
return f"release_commits:{release.id}"
|
|
|
|
|
|
|
|
|
-def _get_commits(releases):
|
|
|
+def _get_commits(releases: Sequence[Release]) -> Sequence[Commit]:
|
|
|
commits = []
|
|
|
|
|
|
fetched = cache.get_many([release_cache_key(release) for release in releases])
|
|
@@ -60,7 +74,7 @@ def _get_commits(releases):
|
|
|
else:
|
|
|
commits += [c for c in cached_commits if c not in commits]
|
|
|
else:
|
|
|
- missed = releases
|
|
|
+ missed = list(releases)
|
|
|
|
|
|
if missed:
|
|
|
release_commits = ReleaseCommit.objects.filter(release__in=missed).select_related(
|
|
@@ -76,7 +90,9 @@ def _get_commits(releases):
|
|
|
return commits
|
|
|
|
|
|
|
|
|
-def _get_commit_file_changes(commits, path_name_set):
|
|
|
+def _get_commit_file_changes(
|
|
|
+ commits: Sequence[Commit], path_name_set: Set[str]
|
|
|
+) -> Sequence[CommitFileChange]:
|
|
|
# Get distinct file names and bail if there are no files.
|
|
|
filenames = {next(tokenize_path(path), None) for path in path_name_set}
|
|
|
filenames = {path for path in filenames if path is not None}
|
|
@@ -91,9 +107,11 @@ def _get_commit_file_changes(commits, path_name_set):
|
|
|
return list(commit_file_change_matches)
|
|
|
|
|
|
|
|
|
-def _match_commits_path(commit_file_changes, path):
|
|
|
+def _match_commits_path(
|
|
|
+ commit_file_changes: Sequence[CommitFileChange], path: str
|
|
|
+) -> Sequence[Tuple[Commit, int]]:
|
|
|
# find commits that match the run time path the best.
|
|
|
- matching_commits = {}
|
|
|
+ matching_commits: MutableMapping[int, Tuple[Commit, int]] = {}
|
|
|
best_score = 1
|
|
|
for file_change in commit_file_changes:
|
|
|
score = score_path_match_length(file_change.filename, path)
|
|
@@ -111,9 +129,22 @@ def _match_commits_path(commit_file_changes, path):
|
|
|
return list(matching_commits.values())
|
|
|
|
|
|
|
|
|
-def _get_committers(annotated_frames, commits):
|
|
|
+class AuthorCommits(TypedDict):
|
|
|
+ author: Author
|
|
|
+ commits: Sequence[Tuple[Commit, int]]
|
|
|
+
|
|
|
+
|
|
|
+class AuthorCommitsSerialized(TypedDict):
|
|
|
+ author: Author
|
|
|
+ commits: Sequence[MutableMapping[str, Any]]
|
|
|
+
|
|
|
+
|
|
|
+def _get_committers(
|
|
|
+ annotated_frames: Sequence[MutableMapping[str, Any]],
|
|
|
+ commits: Sequence[Tuple[Commit, int]],
|
|
|
+) -> Sequence[AuthorCommits]:
|
|
|
# extract the unique committers and return their serialized sentry accounts
|
|
|
- committers = defaultdict(int)
|
|
|
+ committers: MutableMapping[int, int] = defaultdict(int)
|
|
|
|
|
|
limit = 5
|
|
|
for annotated_frame in annotated_frames:
|
|
@@ -128,23 +159,26 @@ def _get_committers(annotated_frames, commits):
|
|
|
break
|
|
|
|
|
|
# organize them by this heuristic (first frame is worth 5 points, second is worth 4, etc.)
|
|
|
- sorted_committers = sorted(committers, key=committers.get)
|
|
|
- users_by_author = get_users_for_commits([c for c, _ in commits])
|
|
|
+ sorted_committers = sorted(committers.items(), key=operator.itemgetter(1))
|
|
|
|
|
|
- user_dicts = [
|
|
|
+ users_by_author: Mapping[str, Author] = get_users_for_commits([c for c, _ in commits])
|
|
|
+
|
|
|
+ user_dicts: Sequence[AuthorCommits] = [
|
|
|
{
|
|
|
- "author": users_by_author.get(str(author_id)),
|
|
|
+ "author": users_by_author.get(str(author_id), {}),
|
|
|
"commits": [
|
|
|
(commit, score) for (commit, score) in commits if commit.author_id == author_id
|
|
|
],
|
|
|
}
|
|
|
- for author_id in sorted_committers
|
|
|
+ for author_id, _ in sorted_committers
|
|
|
]
|
|
|
|
|
|
return user_dicts
|
|
|
|
|
|
|
|
|
-def get_previous_releases(project, start_version, limit=5):
|
|
|
+def get_previous_releases(
|
|
|
+ project: Project, start_version: str, limit: int = 5
|
|
|
+) -> Union[Any, Sequence[Release]]:
|
|
|
# given a release version + project, return the previous
|
|
|
# `limit` releases (includes the release specified by `version`)
|
|
|
key = "get_previous_releases:1:%s" % hash_values([project.id, start_version, limit])
|
|
@@ -193,7 +227,13 @@ def get_previous_releases(project, start_version, limit=5):
|
|
|
return rv
|
|
|
|
|
|
|
|
|
-def get_event_file_committers(project, group_id, event_frames, event_platform, frame_limit=25):
|
|
|
+def get_event_file_committers(
|
|
|
+ project: Project,
|
|
|
+ group_id: int,
|
|
|
+ event_frames: Sequence[MutableMapping[str, Any]],
|
|
|
+ event_platform: str,
|
|
|
+ frame_limit: int = 25,
|
|
|
+) -> Sequence[AuthorCommits]:
|
|
|
group = Group.objects.get_from_cache(id=group_id)
|
|
|
|
|
|
first_release_version = group.get_first_release()
|
|
@@ -224,7 +264,7 @@ def get_event_file_committers(project, group_id, event_frames, event_platform, f
|
|
|
for frame in frames:
|
|
|
if frame.get("filename") is None:
|
|
|
continue
|
|
|
- if "/" not in frame.get("filename") and frame.get("module"):
|
|
|
+ if "/" not in str(frame.get("filename")) and frame.get("module"):
|
|
|
# Replace the last module segment with the filename, as the
|
|
|
# terminal element in a module path is the class
|
|
|
module = frame["module"].split(".")
|
|
@@ -235,38 +275,44 @@ def get_event_file_committers(project, group_id, event_frames, event_platform, f
|
|
|
# XXX(dcramer): frames may not define a filepath. For example, in Java its common
|
|
|
# to only have a module name/path
|
|
|
path_set = {
|
|
|
- f for f in (frame.get("filename") or frame.get("abs_path") for frame in app_frames) if f
|
|
|
+ str(f)
|
|
|
+ for f in (frame.get("filename") or frame.get("abs_path") for frame in app_frames)
|
|
|
+ if f
|
|
|
}
|
|
|
|
|
|
- file_changes = []
|
|
|
+ file_changes: Sequence[CommitFileChange] = []
|
|
|
if path_set:
|
|
|
file_changes = _get_commit_file_changes(commits, path_set)
|
|
|
|
|
|
- commit_path_matches = {path: _match_commits_path(file_changes, path) for path in path_set}
|
|
|
+ commit_path_matches: Mapping[str, Sequence[Tuple[Commit, int]]] = {
|
|
|
+ path: _match_commits_path(file_changes, path) for path in path_set
|
|
|
+ }
|
|
|
|
|
|
annotated_frames = [
|
|
|
{
|
|
|
"frame": frame,
|
|
|
- "commits": commit_path_matches.get(frame.get("filename") or frame.get("abs_path"))
|
|
|
+ "commits": commit_path_matches.get(str(frame.get("filename") or frame.get("abs_path")))
|
|
|
or [],
|
|
|
}
|
|
|
for frame in app_frames
|
|
|
]
|
|
|
|
|
|
- relevant_commits = list(
|
|
|
- {match for match in commit_path_matches for match in commit_path_matches[match]}
|
|
|
+ relevant_commits: Sequence[Tuple[Commit, int]] = list(
|
|
|
+ {match for matches in commit_path_matches.values() for match in matches}
|
|
|
)
|
|
|
|
|
|
return _get_committers(annotated_frames, relevant_commits)
|
|
|
|
|
|
|
|
|
-def get_serialized_event_file_committers(project, event, frame_limit=25):
|
|
|
+def get_serialized_event_file_committers(
|
|
|
+ project: Project, event: Event, frame_limit: int = 25
|
|
|
+) -> Sequence[AuthorCommitsSerialized]:
|
|
|
event_frames = get_frame_paths(event.data)
|
|
|
committers = get_event_file_committers(
|
|
|
project, event.group_id, event_frames, event.platform, frame_limit=frame_limit
|
|
|
)
|
|
|
commits = [commit for committer in committers for commit in committer["commits"]]
|
|
|
- serialized_commits = serialize(
|
|
|
+ serialized_commits: Sequence[MutableMapping[str, Any]] = serialize(
|
|
|
[c for (c, score) in commits], serializer=CommitSerializer(exclude=["author"])
|
|
|
)
|
|
|
|
|
@@ -276,18 +322,23 @@ def get_serialized_event_file_committers(project, event, frame_limit=25):
|
|
|
serialized_commit["score"] = score
|
|
|
serialized_commits_by_id[commit.id] = serialized_commit
|
|
|
|
|
|
+ serialized_committers: List[AuthorCommitsSerialized] = []
|
|
|
for committer in committers:
|
|
|
commit_ids = [commit.id for (commit, _) in committer["commits"]]
|
|
|
commits_result = [serialized_commits_by_id[commit_id] for commit_id in commit_ids]
|
|
|
- committer["commits"] = dedupe_commits(commits_result)
|
|
|
+ serialized_committers.append(
|
|
|
+ {"author": committer["author"], "commits": dedupe_commits(commits_result)}
|
|
|
+ )
|
|
|
|
|
|
metrics.incr(
|
|
|
"feature.owners.has-committers",
|
|
|
instance="hit" if committers else "miss",
|
|
|
skip_internal=False,
|
|
|
)
|
|
|
- return committers
|
|
|
+ return serialized_committers
|
|
|
|
|
|
|
|
|
-def dedupe_commits(commits):
|
|
|
+def dedupe_commits(
|
|
|
+ commits: Sequence[MutableMapping[str, Any]]
|
|
|
+) -> Sequence[MutableMapping[str, Any]]:
|
|
|
return list({c["id"]: c for c in commits}.values())
|