test_span_desc_clusterer.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. from unittest import mock
  2. from sentry.ingest.transaction_clusterer import ClustererNamespace
  3. from sentry.ingest.transaction_clusterer.base import ReplacementRule
  4. from sentry.ingest.transaction_clusterer.datasource.redis import (
  5. _record_sample,
  6. clear_samples,
  7. get_active_projects,
  8. get_span_descriptions,
  9. record_span_descriptions,
  10. )
  11. from sentry.ingest.transaction_clusterer.meta import get_clusterer_meta
  12. from sentry.ingest.transaction_clusterer.rules import (
  13. ProjectOptionRuleStore,
  14. get_rules,
  15. get_sorted_rules,
  16. update_rules,
  17. )
  18. from sentry.ingest.transaction_clusterer.tasks import (
  19. cluster_projects_span_descs,
  20. spawn_clusterers_span_descs,
  21. )
  22. from sentry.models.organization import Organization
  23. from sentry.models.project import Project
  24. from sentry.testutils.helpers.datetime import freeze_time
  25. from sentry.testutils.helpers.features import Feature
  26. from sentry.testutils.helpers.options import override_options
  27. from sentry.testutils.pytest.fixtures import django_db_all
  28. @mock.patch("sentry.ingest.transaction_clusterer.datasource.redis.MAX_SET_SIZE", 5)
  29. def test_collection():
  30. org = Organization(pk=666)
  31. project1 = Project(id=101, name="p1", organization=org)
  32. project2 = Project(id=102, name="project2", organization=org)
  33. for project in (project1, project2):
  34. for i in range(len(project.name)):
  35. _record_sample(ClustererNamespace.SPANS, project, f"span.desc-{project.name}-{i}")
  36. _record_sample(ClustererNamespace.SPANS, project, f"span.desc-{project.name}-{i}")
  37. set_entries1 = set(get_span_descriptions(project1))
  38. assert set_entries1 == {"span.desc-p1-0", "span.desc-p1-1"}
  39. set_entries2 = set(get_span_descriptions(project2))
  40. assert len(set_entries2) == 5, set_entries2
  41. # We don't know which entries made it into the final set:
  42. for name in set_entries2:
  43. assert name.startswith("span.desc-project2-")
  44. project3 = Project(id=103, name="project3", organization=Organization(pk=66))
  45. assert set() == set(get_span_descriptions(project3))
  46. def test_clear_redis():
  47. project = Project(id=101, name="p1", organization=Organization(pk=66))
  48. _record_sample(ClustererNamespace.SPANS, project, "foo")
  49. assert set(get_span_descriptions(project)) == {"foo"}
  50. clear_samples(ClustererNamespace.SPANS, project)
  51. assert set(get_span_descriptions(project)) == set()
  52. # Deleting for a none-existing project does not crash:
  53. project2 = Project(id=666, name="project2", organization=Organization(pk=66))
  54. clear_samples(ClustererNamespace.SPANS, project2)
  55. @mock.patch("sentry.ingest.transaction_clusterer.datasource.redis.MAX_SET_SIZE", 100)
  56. def test_distribution():
  57. """Make sure that the redis set prefers newer entries"""
  58. project = Project(id=103, name="", organization=Organization(pk=66))
  59. for i in range(1000):
  60. _record_sample(ClustererNamespace.SPANS, project, str(i))
  61. freshness = sum(map(int, get_span_descriptions(project))) / 100
  62. # The average is usually around ~900, check for > 800 to be on the safe side
  63. assert freshness > 800, freshness
  64. @mock.patch("sentry.ingest.transaction_clusterer.datasource.redis._record_sample")
  65. @django_db_all
  66. def test_record_span_desc_url(mocked_record, default_organization):
  67. with Feature(
  68. {
  69. "projects:span-metrics-extraction": True,
  70. }
  71. ):
  72. project = Project(id=111, name="project", organization_id=default_organization.id)
  73. record_span_descriptions(
  74. project,
  75. {
  76. "spans": [
  77. {
  78. "op": "resource.css",
  79. "description": "https://www.domain.com/jane/path/to/something.en-us.js;p?q=s",
  80. },
  81. {
  82. "op": "resource.css",
  83. "description": "webroot/my.js",
  84. },
  85. ],
  86. },
  87. )
  88. assert mocked_record.mock_calls == [
  89. mock.call(
  90. ClustererNamespace.SPANS,
  91. Project(id=111, name="project", slug=None),
  92. "www.domain.com/jane/path/to/something.en-us.js",
  93. ),
  94. mock.call(
  95. ClustererNamespace.SPANS,
  96. Project(id=111, name="project", slug=None),
  97. "webroot/my.js",
  98. ),
  99. ]
  100. def test_sort_rules():
  101. rules = {
  102. ReplacementRule("/a/*/**"): 1,
  103. ReplacementRule("/a/**"): 2,
  104. ReplacementRule("/a/*/c/**"): 3,
  105. }
  106. assert ProjectOptionRuleStore(ClustererNamespace.SPANS)._sort(rules) == [
  107. ("/a/*/c/**", 3),
  108. ("/a/*/**", 1),
  109. ("/a/**", 2),
  110. ]
  111. @mock.patch("sentry.ingest.transaction_clusterer.rules.CompositeRuleStore.MERGE_MAX_RULES", 2)
  112. @django_db_all
  113. def test_max_rule_threshold_merge_composite_store(default_project):
  114. assert len(get_sorted_rules(ClustererNamespace.SPANS, default_project)) == 0
  115. with freeze_time("2000-01-01 01:00:00"):
  116. update_rules(ClustererNamespace.SPANS, default_project, [ReplacementRule("foo/foo")])
  117. update_rules(ClustererNamespace.SPANS, default_project, [ReplacementRule("bar/bar")])
  118. assert get_sorted_rules(ClustererNamespace.SPANS, default_project) == [
  119. ("foo/foo", 946688400),
  120. ("bar/bar", 946688400),
  121. ]
  122. with freeze_time("2000-01-01 02:00:00"):
  123. update_rules(ClustererNamespace.SPANS, default_project, [ReplacementRule("baz/baz")])
  124. assert len(get_sorted_rules(ClustererNamespace.SPANS, default_project)) == 2
  125. update_rules(ClustererNamespace.SPANS, default_project, [ReplacementRule("qux/qux")])
  126. assert len(get_sorted_rules(ClustererNamespace.SPANS, default_project)) == 2
  127. assert get_sorted_rules(ClustererNamespace.SPANS, default_project) == [
  128. ("baz/baz", 946692000),
  129. ("qux/qux", 946692000),
  130. ]
  131. @django_db_all
  132. def test_save_rules(default_project):
  133. project = default_project
  134. project_rules = get_rules(ClustererNamespace.SPANS, project)
  135. assert project_rules == {}
  136. with freeze_time("2012-01-14 12:00:01"):
  137. assert 2 == update_rules(
  138. ClustererNamespace.SPANS,
  139. default_project,
  140. [ReplacementRule("foo"), ReplacementRule("bar")],
  141. )
  142. project_rules = get_rules(ClustererNamespace.SPANS, project)
  143. assert project_rules == {"foo": 1326542401, "bar": 1326542401}
  144. with freeze_time("2012-01-14 12:00:02"):
  145. assert 1 == update_rules(
  146. ClustererNamespace.SPANS,
  147. default_project,
  148. [ReplacementRule("bar"), ReplacementRule("zap")],
  149. )
  150. project_rules = get_rules(ClustererNamespace.SPANS, project)
  151. assert {"bar": 1326542402, "foo": 1326542401, "zap": 1326542402}
  152. # From the test -- number of transactions: 30 == 10 * 2 + 5 * 2
  153. @mock.patch("sentry.ingest.transaction_clusterer.datasource.redis.MAX_SET_SIZE", 30)
  154. @mock.patch("sentry.ingest.transaction_clusterer.tasks.MERGE_THRESHOLD_SPANS", 5)
  155. @mock.patch(
  156. "sentry.ingest.transaction_clusterer.tasks.cluster_projects_span_descs.delay",
  157. wraps=cluster_projects_span_descs, # call immediately
  158. )
  159. @django_db_all
  160. @freeze_time("2000-01-01 01:00:00")
  161. def test_run_clusterer_task(cluster_projects_span_descs, default_organization):
  162. def _add_mock_data(proj, number):
  163. for i in range(0, number):
  164. _record_sample(ClustererNamespace.SPANS, proj, f"/user/span.desc-{proj.name}-{i}")
  165. _record_sample(ClustererNamespace.SPANS, proj, f"/org/span.desc-{proj.name}-{i}")
  166. with Feature({"projects:span-metrics-extraction", True}):
  167. project1 = Project(id=123, name="project1", organization_id=default_organization.id)
  168. project2 = Project(id=223, name="project2", organization_id=default_organization.id)
  169. for project in (project1, project2):
  170. project.save()
  171. _add_mock_data(project, 4)
  172. assert (
  173. get_clusterer_meta(ClustererNamespace.SPANS, project1)
  174. == get_clusterer_meta(ClustererNamespace.SPANS, project2)
  175. == {"first_run": 0, "last_run": 0, "runs": 0}
  176. )
  177. spawn_clusterers_span_descs()
  178. assert cluster_projects_span_descs.call_count == 1
  179. cluster_projects_span_descs.reset_mock()
  180. # Not stored enough transactions yet
  181. assert get_rules(ClustererNamespace.SPANS, project1) == {}
  182. assert get_rules(ClustererNamespace.SPANS, project2) == {}
  183. assert (
  184. get_clusterer_meta(ClustererNamespace.SPANS, project1)
  185. == get_clusterer_meta(ClustererNamespace.SPANS, project2)
  186. == {"first_run": 946688400, "last_run": 946688400, "runs": 1}
  187. )
  188. # Clear transactions if batch minimum is not met
  189. assert list(get_span_descriptions(project1)) == []
  190. assert list(get_span_descriptions(project2)) == []
  191. _add_mock_data(project1, 10)
  192. _add_mock_data(project2, 10)
  193. # add more span descriptions to the project 1
  194. for i in range(5):
  195. _record_sample(
  196. ClustererNamespace.SPANS, project1, f"/users/spans.desc/span-{project1.id}-{i}"
  197. )
  198. _record_sample(ClustererNamespace.SPANS, project1, f"/test/path/{i}")
  199. # Add a transaction to project2 so it runs again
  200. _record_sample(ClustererNamespace.SPANS, project2, "foo")
  201. with mock.patch(
  202. "sentry.ingest.transaction_clusterer.tasks.PROJECTS_PER_TASK", 1
  203. ), freeze_time("2000-01-01 01:00:01"):
  204. spawn_clusterers_span_descs()
  205. # One project per batch now:
  206. assert cluster_projects_span_descs.call_count == 2, cluster_projects_span_descs.call_args
  207. rules = get_rules(ClustererNamespace.SPANS, project1)
  208. assert rules.keys() == {
  209. "/org/*/**",
  210. "/user/*/**",
  211. "/test/path/*/**",
  212. "/users/spans.desc/*/**",
  213. }
  214. assert (
  215. get_clusterer_meta(ClustererNamespace.SPANS, project1)
  216. == get_clusterer_meta(ClustererNamespace.SPANS, project2)
  217. == {"first_run": 946688400, "last_run": 946688401, "runs": 2}
  218. )
  219. @mock.patch("sentry.ingest.transaction_clusterer.datasource.redis.MAX_SET_SIZE", 2)
  220. @mock.patch("sentry.ingest.transaction_clusterer.tasks.MERGE_THRESHOLD_SPANS", 2)
  221. @mock.patch("sentry.ingest.transaction_clusterer.rules.update_rules")
  222. @django_db_all
  223. def test_clusterer_only_runs_when_enough_data(mock_update_rules, default_project):
  224. project = default_project
  225. assert get_rules(ClustererNamespace.SPANS, project) == {}
  226. _record_sample(ClustererNamespace.SPANS, project, "/span-desc/number/1")
  227. cluster_projects_span_descs([project])
  228. # Clusterer didn't create rules. Still, it updates the stores.
  229. assert mock_update_rules.call_count == 1
  230. assert mock_update_rules.call_args == mock.call(ClustererNamespace.SPANS, project, [])
  231. # Transaction names are deleted if there aren't enough
  232. assert get_rules(ClustererNamespace.SPANS, project) == {}
  233. _record_sample(ClustererNamespace.SPANS, project, "http://domain.com/span-desc/number/1")
  234. _record_sample(ClustererNamespace.SPANS, project, "http://domain.com/span-desc/number/2")
  235. cluster_projects_span_descs([project])
  236. assert mock_update_rules.call_count == 2
  237. assert mock_update_rules.call_args == mock.call(
  238. ClustererNamespace.SPANS, project, ["http://domain.com/span-desc/number/*/**"]
  239. )
  240. @django_db_all
  241. def test_get_deleted_project():
  242. deleted_project = Project(pk=666, organization=Organization(pk=666))
  243. _record_sample(ClustererNamespace.SPANS, deleted_project, "foo")
  244. assert list(get_active_projects(ClustererNamespace.SPANS)) == []
  245. @django_db_all
  246. def test_record_span_descriptions_no_databag(default_organization):
  247. """Verify a `None` databag doesn't break the span description clusterer."""
  248. with Feature("projects:span-metrics-extraction"), override_options(
  249. {"span_descs.bump-lifetime-sample-rate": 1.0}
  250. ):
  251. payload = {
  252. "spans": [
  253. {
  254. "op": "resource.css",
  255. },
  256. ],
  257. }
  258. project = Project(id=123, name="project", organization_id=default_organization.id)
  259. record_span_descriptions(project, payload)
  260. @django_db_all
  261. def test_stale_rules_arent_saved(default_project):
  262. assert len(get_sorted_rules(ClustererNamespace.SPANS, default_project)) == 0
  263. with freeze_time("2000-01-01 01:00:00"):
  264. update_rules(ClustererNamespace.SPANS, default_project, [ReplacementRule("foo/foo")])
  265. assert get_sorted_rules(ClustererNamespace.SPANS, default_project) == [("foo/foo", 946688400)]
  266. with freeze_time("2000-02-02 02:00:00"):
  267. update_rules(ClustererNamespace.SPANS, default_project, [ReplacementRule("bar/bar")])
  268. assert get_sorted_rules(ClustererNamespace.SPANS, default_project) == [
  269. ("bar/bar", 949456800),
  270. ("foo/foo", 946688400),
  271. ]
  272. with freeze_time("2001-01-01 01:00:00"):
  273. update_rules(ClustererNamespace.SPANS, default_project, [ReplacementRule("baz/baz")])
  274. assert get_sorted_rules(ClustererNamespace.SPANS, default_project) == [("baz/baz", 978310800)]