Browse Source

chore(hyrbid-cloud): Support smaller model manifest file, support counting stable tests (#41185)

1. Allow for a new script that can count stable marked tests by model
and total
2. Reduce size of the model manifest file such that it can be committed
and share via git.

There will be a second PR on the getsentry side that gets the model
manifest for that side in.
Zach Collins 2 years ago
parent
commit
2c0aa94fb6

+ 43 - 0
bin/count-hybrid-cloud-test-progress

@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+import os
+import pathlib
+from collections import defaultdict
+
+from sentry.runner import configure
+
+configure()
+from sentry.testutils.modelmanifest import ModelManifest
+
+_manifest_files_env = {
+    "SENTRY_MODEL_MANIFEST_FILE_PATH": pathlib.Path(__file__).absolute().parent.parent,
+    "GETSENTRY_MODEL_MANIFEST_FILE_PATH": pathlib.Path(__file__)
+    .absolute()
+    .parent.parent.parent.joinpath("getsentry"),
+}
+
+
+for manifest_file_env_name, root_path in _manifest_files_env.items():
+    manifest_path = os.environ[manifest_file_env_name]
+    print(f"For {manifest_path}:")  # noqa
+    manifest = ModelManifest.open(file_path=manifest_path)
+
+    all_stable = set()
+    by_model_counts = defaultdict(lambda: [0, 0])
+    totals = [0, 0]
+
+    for test_id, test_visitor in manifest.each_hybrid_cloud_test(root_path):
+        test_visitor.load()
+        if test_visitor.decorator_was_stable:
+            totals[0] += 1
+        totals[1] += 1
+
+        for model_id in manifest.connections[test_id]:
+            model_name = manifest.reverse_lookup[model_id]
+            if test_visitor.decorator_was_stable:
+                by_model_counts[model_name][0] += 1
+            by_model_counts[model_name][1] += 1
+
+    for model_name, counts in by_model_counts.items():
+        print(f"\t{model_name}: {counts[0]}/{counts[1]}")  # noqa
+
+    print(f"Total: {totals[0]}/{totals[1]}")  # noqa

+ 76 - 138
bin/decorate-silo-mode-tests

@@ -1,16 +1,16 @@
 #!/usr/bin/env python
-import ast
 import os
 import os.path
 import pathlib
 import shutil
 import tempfile
-from typing import Union
+from typing import Iterator
 
 import click
 
-from sentry.utils import json
-from sentry.utils.types import Any
+from sentry.runner import configure
+
+configure()
 
 _manifest_files_env = {
     "SENTRY_MODEL_MANIFEST_FILE_PATH": pathlib.Path(__file__).absolute().parent.parent,
@@ -20,14 +20,20 @@ _manifest_files_env = {
 }
 
 
-def find_test_cases_matching(model_name: str):
-    for env, path_root in _manifest_files_env.items():
-        manifest = json.loads(open(os.environ[env]).read())
-        for test_node_id, hits in manifest.items():
-            hit_set = {list(v.keys())[0] for v in hits}
-            if model_name in hit_set:
-                parts = test_node_id.split("::")
-                yield str(path_root.joinpath(parts[0])), parts[1]
+from sentry.testutils.modelmanifest import HybridCloudTestVisitor, ModelManifest
+
+
+def find_test_cases_matching(model_name: str) -> Iterator[HybridCloudTestVisitor]:
+    for env_name, path_root in _manifest_files_env.items():
+        manifest = ModelManifest.open(os.environ[env_name])
+        model_test_ids = manifest.connections[
+            manifest.get_or_create_id(manifest.model_names, model_name)
+        ]
+        for test_id, test_visitor in manifest.each_hybrid_cloud_test(path_root):
+            if test_id not in model_test_ids:
+                continue
+            test_visitor.load()
+            yield test_visitor
 
 
 def pick(prompt, options):
@@ -44,147 +50,79 @@ def main(target_model: str):
     Script to decorate the given target test for silo tests, making it easier to deploy changes to given tests.
     """
 
-    for file_name, test_case_name in find_test_cases_matching(target_model):
-        print(f"Trying {test_case_name} in {file_name}")  # noqa
-        test_case_name = test_case_name.split("[")[
-            0
-        ]  # remove any parameterization off the test case
-        file_path = os.path.abspath(file_name)
-        file_ast = ast.parse(open(file_path).read())
-        test_visitor = TestVisitor(test_case_name)
-        test_visitor.visit(file_ast)
+    for test_visitor in find_test_cases_matching(target_model):
+        print(f"Trying {test_visitor.test_name} in {test_visitor.test_file_path}")  # noqa
 
         if not test_visitor.decorator_was_stable:
-            print(f"Updating {test_case_name}")  # noqa
+            print(f"Found {test_visitor.test_name}")  # noqa
+            try:
+                if pick("Skip this test?", ["y", "n"]) == "y":
+                    continue
+            except EOFError:
+                continue
+
             try:
                 silo_mode = pick("silo mode?", ["all", "no", "control", "region"])
                 set_stable = pick("set stable?", ["y", "n"]) == "y"
             except EOFError:
                 continue
-            test_visitor.rewrite(f"{silo_mode}_silo_test", set_stable, file_path)
-            return
-
-
-class TestVisitor(ast.NodeVisitor):
-    def __init__(self, target_symbol_path: str):
-        self.target_symbol_parts = target_symbol_path.split(".")
-        self.import_match_line = False
-        self.decorator_match_line = None
-        self.func_match_line = None
-        self.class_node = None
-        self.decorator_was_stable = False
-
-    def visit_ImportFrom(self, node: ast.ImportFrom) -> Any:
-        if node.module == "sentry.testutils.silo":
-            for name in node.names:
-                if isinstance(name, ast.alias):
-                    if name.name.endswith("_silo_test"):
-                        self.import_match_line = (node.lineno, node.col_offset)
-
-    def visit_ClassDef(self, node: ast.ClassDef) -> Any:
-        if len(self.target_symbol_parts) == 2 and self.target_symbol_parts[0] == node.name:
-            self.class_node = node
-            self.generic_visit(node)
-            self.class_node = None
-        elif len(self.target_symbol_parts) == 1:
-            if self.target_symbol_parts[-1] == node.name or self.target_symbol_parts[-1] in {
-                e.id for e in node.bases if isinstance(e, ast.Name)
-            }:
-                self.mark_target(node)
-
-    def visit_FunctionDef(self, node: Union[ast.FunctionDef, ast.ClassDef]) -> Any:
-        if self.target_symbol_parts[-1] == node.name:
-            if self.class_node:
-                node = self.class_node
-            elif len(self.target_symbol_parts) != 1:
-                return
-
-            self.mark_target(node)
-            return
-
-        return self.generic_visit(node)
-
-    def mark_target(self, node: Union[ast.FunctionDef, ast.ClassDef]):
-        self.func_match_line = (node.lineno, node.col_offset)
-        for expr in node.decorator_list:
-            decorator_visitor = DecoratorVisitor()
-            decorator_visitor.visit(expr)
-            if decorator_visitor.match_line:
-                self.decorator_match_line = decorator_visitor.match_line
-                self.decorator_was_stable = decorator_visitor.stable
-                break
-
-    def _decorate(self, target_test_silo_mode, set_stable, lineno, match_line):
-        if not match_line:
-            return False
-
-        if not match_line[0] == lineno:
-            return False
-
-        ws = b" " * match_line[1]
-        if set_stable:
-            return ws + f"@{target_test_silo_mode}(stable=True)\n".encode()
-        else:
-            return ws + f"@{target_test_silo_mode}\n".encode()
-
-    def rewrite(self, target_test_silo_mode, set_stable, path):
-        import_line = f"from sentry.testutils.silo import {target_test_silo_mode}\n".encode()
-        if not self.decorator_match_line and not self.func_match_line:
-            raise Exception(f"Could not find test case {self.target_symbol_parts}!")
-
-        with tempfile.NamedTemporaryFile(delete=False) as tf:
-            with open(path) as f:
-                if not self.import_match_line:
-                    tf.write(import_line)
+            _rewrite(test_visitor, f"{silo_mode}_silo_test", set_stable)
 
-                for i, line in enumerate(f.readlines()):
-                    i += 1
-
-                    if self.import_match_line and self.import_match_line[0] == i:
-                        tf.write(import_line)
-                        continue
-
-                    if newline := self._decorate(
-                        target_test_silo_mode, set_stable, i, self.decorator_match_line
-                    ):
-                        # If the decorator type is not changing, keep the original line.
-                        # If the decorator that existed was stable, don't replace it, keep it.
-                        if self.decorator_was_stable:
-                            tf.write(line.encode("utf8"))
-                        else:
-                            tf.write(newline)
-                        continue
-
-                    if not self.decorator_match_line and (
-                        newline := self._decorate(
-                            target_test_silo_mode, set_stable, i, self.func_match_line
-                        )
-                    ):
-                        tf.write(newline)
 
-                    tf.write(line.encode("utf8"))
+def _decorate(target_test_silo_mode, set_stable, lineno, match_line):
+    if not match_line:
+        return False
+
+    if not match_line[0] == lineno:
+        return False
+
+    ws = b" " * match_line[1]
+    if set_stable:
+        return ws + f"@{target_test_silo_mode}(stable=True)\n".encode()
+    else:
+        return ws + f"@{target_test_silo_mode}\n".encode()
 
-            tf.close()
-            shutil.move(tf.name, path)
 
+def _rewrite(test_visitor: HybridCloudTestVisitor, target_test_silo_mode, set_stable):
+    path = test_visitor.test_file_path
+    import_line = f"from sentry.testutils.silo import {target_test_silo_mode}\n".encode()
+    if not test_visitor.decorator_match_line and not test_visitor.func_match_line:
+        raise Exception(f"Could not find test case {test_visitor.target_symbol_parts}!")
 
-class DecoratorVisitor(ast.NodeVisitor):
-    def __init__(self):
-        self.match_line = None
-        self.stable = False
+    with tempfile.NamedTemporaryFile(delete=False) as tf:
+        with open(path) as f:
+            if not test_visitor.import_match_line:
+                tf.write(import_line)
+
+            for i, line in enumerate(f.readlines()):
+                i += 1
+
+                if test_visitor.import_match_line and test_visitor.import_match_line[0] == i:
+                    tf.write(import_line)
+                    continue
+
+                if newline := _decorate(
+                    target_test_silo_mode, set_stable, i, test_visitor.decorator_match_line
+                ):
+                    # If the decorator type is not changing, keep the original line.
+                    # If the decorator that existed was stable, don't replace it, keep it.
+                    if test_visitor.decorator_was_stable:
+                        tf.write(line.encode("utf8"))
+                    else:
+                        tf.write(newline)
+                    continue
 
-    def visit_keyword(self, node: ast.keyword) -> Any:
-        if node.arg == "stable":
-            if isinstance(node.value, ast.Constant):
-                self.stable = node.value.value
+                if not test_visitor.decorator_match_line and (
+                    newline := _decorate(
+                        target_test_silo_mode, set_stable, i, test_visitor.func_match_line
+                    )
+                ):
+                    tf.write(newline)
 
-    def visit_Name(self, node: ast.Name) -> Any:
-        if node.id.endswith("_silo_test"):
-            self.match_line = (node.lineno, node.col_offset - 1)
-        return ast.NodeVisitor.generic_visit(self, node)
+                tf.write(line.encode("utf8"))
 
-    def visit_Attribute(self, node: ast.Attribute) -> Any:
-        pass
+        tf.close()
+        shutil.move(tf.name, path)
 
 
 if __name__ == "__main__":

File diff suppressed because it is too large
+ 0 - 0
model-manifest.json


+ 175 - 44
src/sentry/testutils/modelmanifest.py

@@ -1,9 +1,11 @@
 from __future__ import annotations
 
+import ast
 import os
 from collections import defaultdict
 from contextlib import ExitStack, contextmanager
-from typing import Any, Collection, Dict, Generator, Iterable, Set, Type
+from pathlib import Path
+from typing import Any, Dict, Generator, Iterable, Iterator, MutableMapping, Set, Tuple, Type, Union
 
 import django.apps
 
@@ -15,69 +17,88 @@ from sentry.utils import json
 class ModelManifest:
     """For auditing which models are touched by each test case."""
 
+    file_path: str
+    connections: MutableMapping[int, Set[int]]
+    model_names: MutableMapping[str, int]
+    test_names: MutableMapping[str, int]
+    reverse_lookup: MutableMapping[int, str]
+    next_id: int
+
+    def get_or_create_id(self, cache: MutableMapping[str, int], name: str) -> int:
+        if name in cache:
+            return cache[name]
+        next_id = self.next_id
+        cache[name] = next_id
+        self.reverse_lookup[next_id] = name
+        self.next_id += 1
+        return next_id
+
     class Entry:
+        hits: set[Type[Model]]
+
         def __init__(self) -> None:
-            self.hits: Dict[Type[Model], Set[ModelManagerTriggerCondition]] = defaultdict(set)
+            self.hits: Set[Type[Model]] = set()
 
         def create_trigger_action(
             self, condition: ModelManagerTriggerCondition
         ) -> ModelManagerTriggerAction:
             def action(model_class: Type[Model]) -> None:
-                self.hits[model_class].add(condition)
+                self.hits.add(model_class)
 
             return action
 
     def __init__(self, file_path: str) -> None:
         self.file_path = file_path
-        self.tests: Dict[str, Collection[ModelManifest.Entry]] = {}
-
-    def _load_json(self, content: Any) -> None:
-        models = {model.__qualname__: model for model in django.apps.apps.get_models()}
-        conditions = {condition.name: condition for condition in ModelManagerTriggerCondition}
-
-        entry_objects = []
-
-        for (test_id, entry_inputs) in content.items():
-            entry_objects.append(entry_obj := ModelManifest.Entry())
-
-            for entry_input in entry_inputs:
-                for (model_name, condition_names) in entry_input.items():
-                    model_class = models[model_name]
-                    for condition_name in condition_names:
-                        condition = conditions[condition_name]
-                        entry_obj.hits[model_class].add(condition)
-
-            self.tests[test_id] = entry_objects
-
-    def _to_json(self) -> Dict[str, Any]:
-        return {
-            test_id: [
-                {
-                    model_class.__qualname__: [condition.name for condition in conditions]
-                    for (model_class, conditions) in entry.hits.items()
-                }
-                for entry in entries
-                if entry.hits
-            ]
-            for (test_id, entries) in self.tests.items()
-        }
+        self.connections = defaultdict(set)
+        self.model_names = {}
+        self.test_names = {}
+        self.reverse_lookup = {}
+        self.next_id = 0
+
+    @classmethod
+    def from_json_file(cls, file_path: str) -> ModelManifest:
+        with open(file_path) as f:
+            content = json.load(f)
+
+        manifest = ModelManifest(file_path)
+        highest_id = 0
+        for model_name, model_id in content["model_names"].items():
+            manifest.model_names[model_name] = model_id
+            highest_id = max(model_id, highest_id)
+            manifest.reverse_lookup[model_id] = model_name
+
+        for test_name, test_id in content["test_names"].items():
+            manifest.test_names[test_name] = test_id
+            highest_id = max(test_id, highest_id)
+            manifest.reverse_lookup[test_id] = test_name
+
+        for id, connections in content["connections"].items():
+            for connection in connections:
+                manifest.connections[int(id)].add(int(connection))
+
+        manifest.next_id = highest_id + 1
+        return manifest
+
+    def to_json(self) -> Dict[str, Any]:
+        return dict(
+            connections=self.connections,
+            test_names=self.test_names,
+            model_names=self.model_names,
+        )
 
     @classmethod
     def open(cls, file_path: str) -> ModelManifest:
-        manifest = cls(file_path)
         if os.path.exists(file_path):
-            with open(file_path) as f:
-                content = json.load(f)
-            manifest._load_json(content)
-        return manifest
+            return cls.from_json_file(file_path)
+        return cls(file_path)
 
     @contextmanager
     def write(self) -> Generator[None, None, None]:
         try:
-            yield  # Populate self.tests
+            yield  # allow population via register
         finally:
             with open(self.file_path, mode="w") as f:
-                json.dump(self._to_json(), f)
+                json.dump(self.to_json(), f)
 
     @staticmethod
     def _get_all_model_managers() -> Iterable[BaseManager]:
@@ -87,7 +108,7 @@ class ModelManifest:
                 yield manager
 
     @contextmanager
-    def register(self, test_id: str) -> Generator[None, None, None]:
+    def register(self, test_name: str) -> Generator[None, None, None]:
         with ExitStack() as stack:
             entries = []
 
@@ -102,4 +123,114 @@ class ModelManifest:
             finally:
                 # Overwrite the entire test in place, in case it used to touch a
                 # model and doesn't anymore
-                self.tests[test_id] = entries
+                test_id = self.get_or_create_id(self.test_names, test_name)
+                self.connections[test_id] = set()
+                for key in list(self.connections.keys()):
+                    self.connections[key].remove(test_id)
+
+                for entry in entries:
+                    for model in entry.hits:
+                        model_id = self.get_or_create_id(self.model_names, model.__name__)
+                        self.connections[test_id].add(model_id)
+                        self.connections[model_id].add(test_id)
+
+    def each_hybrid_cloud_test(
+        self, path_refix: Path
+    ) -> Iterator[Tuple[int, HybridCloudTestVisitor]]:
+        for test_node_name, test_id in self.test_names.items():
+            test_file_path: str
+            test_case_name: str
+            test_node_name = test_node_name.split("[")[0]
+            test_file_path, test_case_name = test_node_name.split("::")
+            test_file_path = os.path.abspath(str(path_refix.joinpath(test_file_path)))
+
+            test_visitor = HybridCloudTestVisitor(test_file_path, test_case_name)
+            if test_visitor.exists:
+                yield test_id, test_visitor
+
+
+class HybridCloudTestDecoratorVisitor(ast.NodeVisitor):
+    match_line: Tuple[int, int] | None
+
+    def __init__(self) -> None:
+        self.match_line = None
+        self.stable = False
+
+    def visit_keyword(self, node: ast.keyword) -> Any:
+        if node.arg == "stable":
+            if isinstance(node.value, ast.Constant):
+                self.stable = node.value.value
+
+    def visit_Name(self, node: ast.Name) -> Any:
+        if node.id.endswith("_silo_test"):
+            self.match_line = (node.lineno, node.col_offset - 1)
+        return ast.NodeVisitor.generic_visit(self, node)
+
+    def visit_Attribute(self, node: ast.Attribute) -> Any:
+        pass
+
+
+class HybridCloudTestVisitor(ast.NodeVisitor):
+    import_match_line: Tuple[int, int] | None
+    class_node: ast.ClassDef | None
+    func_match_line: Tuple[int, int] | None
+    decorator_match_line: Tuple[int, int] | None
+
+    def __init__(self, test_file_path: str, test_name: str):
+        self.test_file_path = test_file_path
+        self.test_name = test_name
+        self.target_symbol_parts = test_name.split(".")
+        self.import_match_line = None
+        self.decorator_match_line = None
+        self.func_match_line = None
+        self.class_node = None
+        self.decorator_was_stable = False
+
+    @property
+    def exists(self) -> bool:
+        return os.path.exists(self.test_file_path)
+
+    def load(self) -> None:
+        with open(self.test_file_path) as f:
+            file_ast = ast.parse(f.read())
+            self.visit(file_ast)
+
+    def visit_ImportFrom(self, node: ast.ImportFrom) -> Any:
+        if node.module == "sentry.testutils.silo":
+            for name in node.names:
+                if isinstance(name, ast.alias):
+                    if name.name.endswith("_silo_test"):
+                        self.import_match_line = (node.lineno, node.col_offset)
+
+    def visit_ClassDef(self, node: ast.ClassDef) -> Any:
+        if len(self.target_symbol_parts) == 2 and self.target_symbol_parts[0] == node.name:
+            self.class_node = node
+            self.generic_visit(node)
+            self.class_node = None
+        elif len(self.target_symbol_parts) == 1:
+            if self.target_symbol_parts[-1] == node.name or self.target_symbol_parts[-1] in {
+                e.id for e in node.bases if isinstance(e, ast.Name)
+            }:
+                self.mark_target(node)
+
+    def visit_FunctionDef(self, node: Union[ast.FunctionDef, ast.ClassDef]) -> Any:
+        if self.target_symbol_parts[-1] == node.name:
+            if self.class_node:
+                node = self.class_node
+            elif len(self.target_symbol_parts) != 1:
+                return
+
+            self.mark_target(node)
+            return
+
+        return self.generic_visit(node)
+
+    def mark_target(self, node: Union[ast.FunctionDef, ast.ClassDef]) -> None:
+        self.func_match_line = (node.lineno, node.col_offset)
+        for expr in node.decorator_list:
+            decorator_visitor = HybridCloudTestDecoratorVisitor()
+            decorator_visitor.visit(expr)
+            if decorator_visitor.match_line:
+                self.decorator_match_line = decorator_visitor.match_line
+                self.decorator_was_stable = decorator_visitor.stable
+                break

+ 10 - 2
tests/sentry/api/test_issue_search.py

@@ -23,7 +23,7 @@ from sentry.api.issue_search import (
 from sentry.exceptions import InvalidSearchQuery
 from sentry.models.group import STATUS_QUERY_CHOICES
 from sentry.testutils import TestCase
-from sentry.testutils.silo import control_silo_test
+from sentry.testutils.silo import region_silo_test
 from sentry.types.issues import GROUP_CATEGORY_TO_TYPES, GroupCategory
 
 
@@ -158,6 +158,7 @@ class ParseSearchQueryTest(unittest.TestCase):
         ]
 
 
+@region_silo_test(stable=True)
 class ConvertJavaScriptConsoleTagTest(TestCase):
     def test_valid(self):
         filters = [SearchFilter(SearchKey("empty_stacktrace.js_console"), "=", SearchValue(True))]
@@ -174,6 +175,7 @@ class ConvertJavaScriptConsoleTagTest(TestCase):
             convert_query_values(filters, [self.project], self.user, None)
 
 
+@region_silo_test(stable=True)
 class ConvertQueryValuesTest(TestCase):
     def test_valid_converter(self):
         filters = [SearchFilter(SearchKey("assigned_to"), "=", SearchValue("me"))]
@@ -190,6 +192,7 @@ class ConvertQueryValuesTest(TestCase):
         assert filters[0].value.raw_value == search_val.raw_value
 
 
+@region_silo_test(stable=True)
 class ConvertStatusValueTest(TestCase):
     def test_valid(self):
         for status_string, status_val in STATUS_QUERY_CHOICES.items():
@@ -214,6 +217,7 @@ class ConvertStatusValueTest(TestCase):
             convert_query_values(filters, [self.project], self.user, None)
 
 
+@region_silo_test(stable=True)
 class ConvertActorOrNoneValueTest(TestCase):
     def test_user(self):
         assert convert_actor_or_none_value(
@@ -235,7 +239,7 @@ class ConvertActorOrNoneValueTest(TestCase):
         )
 
 
-@control_silo_test
+@region_silo_test
 class ConvertUserValueTest(TestCase):
     def test_me(self):
         assert convert_user_value(["me"], [self.project], self.user, None) == [self.user]
@@ -248,6 +252,7 @@ class ConvertUserValueTest(TestCase):
         assert convert_user_value(["fake-user"], [], None, None)[0].id == 0
 
 
+@region_silo_test(stable=True)
 class ConvertReleaseValueTest(TestCase):
     def test(self):
         assert convert_release_value(["123"], [self.project], self.user, None) == "123"
@@ -258,6 +263,7 @@ class ConvertReleaseValueTest(TestCase):
         assert convert_release_value(["14.*"], [self.project], self.user, None) == "14.*"
 
 
+@region_silo_test(stable=True)
 class ConvertFirstReleaseValueTest(TestCase):
     def test(self):
         assert convert_first_release_value(["123"], [self.project], self.user, None) == ["123"]
@@ -270,6 +276,7 @@ class ConvertFirstReleaseValueTest(TestCase):
         assert convert_first_release_value(["14.*"], [self.project], self.user, None) == ["14.*"]
 
 
+@region_silo_test(stable=True)
 class ConvertCategoryValueTest(TestCase):
     def test(self):
         with self.feature("organizations:performance-issues"):
@@ -290,6 +297,7 @@ class ConvertCategoryValueTest(TestCase):
                 convert_category_value(["hellboy"], [self.project], self.user, None)
 
 
+@region_silo_test(stable=True)
 class ConvertTypeValueTest(TestCase):
     def test(self):
         with self.feature("organizations:performance-issues"):

Some files were not shown because too many files changed in this diff