test_validate_data.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. from datetime import timedelta
  2. import pytest
  3. from django.utils import timezone
  4. from sentry.constants import MAX_CULPRIT_LENGTH, MAX_VERSION_LENGTH
  5. from sentry.event_manager import EventManager
  6. def validate_and_normalize(data):
  7. manager = EventManager(data)
  8. manager.normalize()
  9. return manager.get_data()
  10. def test_timestamp():
  11. data = validate_and_normalize({"timestamp": "not-a-timestamp"})
  12. assert len(data["errors"]) == 1
  13. now = timezone.now()
  14. data = validate_and_normalize({"timestamp": now.strftime("%Y-%m-%dT%H:%M:%SZ")})
  15. assert "errors" not in data
  16. future = now + timedelta(minutes=2)
  17. data = validate_and_normalize({"timestamp": future.strftime("%Y-%m-%dT%H:%M:%SZ")})
  18. assert len(data["errors"]) == 1
  19. assert data["errors"][0]["type"] == "future_timestamp"
  20. past = now - timedelta(days=31)
  21. data = validate_and_normalize({"timestamp": past.strftime("%Y-%m-%dT%H:%M:%SZ")})
  22. assert len(data["errors"]) == 1
  23. assert data["errors"][0]["type"] == "past_timestamp"
  24. def test_empty_event_id():
  25. data = validate_and_normalize({"event_id": ""})
  26. assert len(data["event_id"]) == 32
  27. def test_missing_event_id():
  28. data = validate_and_normalize({})
  29. assert len(data["event_id"]) == 32
  30. def test_invalid_event_id():
  31. data = validate_and_normalize({"event_id": "a" * 33})
  32. assert len(data["event_id"]) == 32
  33. assert len(data["errors"]) == 1
  34. assert data["errors"][0]["type"] == "invalid_data"
  35. assert data["errors"][0]["name"] == "event_id"
  36. assert data["errors"][0]["value"] == "a" * 33
  37. data = validate_and_normalize({"event_id": "xyz"})
  38. assert len(data["event_id"]) == 32
  39. assert len(data["errors"]) == 1
  40. assert data["errors"][0]["type"] == "invalid_data"
  41. assert data["errors"][0]["name"] == "event_id"
  42. assert data["errors"][0]["value"] == "xyz"
  43. def test_unknown_attribute():
  44. data = validate_and_normalize({"message": "foo", "foo": "bar"})
  45. assert data["foo"] is None
  46. assert len(data["errors"]) == 1
  47. assert data["errors"][0]["type"] == "invalid_attribute"
  48. assert data["errors"][0]["name"] == "foo"
  49. def test_invalid_interface_name():
  50. data = validate_and_normalize({"message": "foo", "foo.baz": "bar"})
  51. assert data["foo.baz"] is None
  52. assert len(data["errors"]) == 1
  53. assert data["errors"][0]["type"] == "invalid_attribute"
  54. assert data["errors"][0]["name"] == "foo.baz"
  55. def test_invalid_interface_import_path():
  56. data = validate_and_normalize({"message": "foo", "exception2": "bar"})
  57. assert data["exception2"] is None
  58. assert len(data["errors"]) == 1
  59. assert data["errors"][0]["type"] == "invalid_attribute"
  60. assert data["errors"][0]["name"] == "exception2"
  61. def test_does_expand_list():
  62. data = validate_and_normalize(
  63. {
  64. "message": "foo",
  65. "exception": [{"type": "ValueError", "value": "hello world", "module": "foo.bar"}],
  66. }
  67. )
  68. assert "exception" in data
  69. def test_log_level_as_string():
  70. data = validate_and_normalize({"message": "foo", "level": "error"})
  71. assert data["level"] == "error"
  72. def test_log_level_as_int():
  73. data = validate_and_normalize({"message": "foo", "level": 40})
  74. assert data["level"] == "error"
  75. def test_invalid_log_level():
  76. data = validate_and_normalize({"message": "foo", "level": "foobar"})
  77. assert data["level"] == "error"
  78. assert len(data["errors"]) == 1
  79. assert data["errors"][0]["type"] == "invalid_data"
  80. assert data["errors"][0]["name"] == "level"
  81. assert data["errors"][0]["value"] == "foobar"
  82. def test_tags_as_string():
  83. data = validate_and_normalize({"message": "foo", "tags": "bar"})
  84. assert data["tags"] == []
  85. def test_tags_with_spaces():
  86. data = validate_and_normalize({"message": "foo", "tags": {"foo bar": "baz bar"}})
  87. assert data["tags"] == [["foo-bar", "baz bar"]]
  88. def test_tags_out_of_bounds():
  89. data = validate_and_normalize(
  90. {"message": "foo", "tags": {"f" * 201: "value", "foo": "v" * 201, "bar": "value"}}
  91. )
  92. assert data["tags"] == [["bar", "value"], [None, "value"], ["foo", None]]
  93. assert len(data["errors"]) == 2
  94. def test_tags_as_invalid_pair():
  95. data = validate_and_normalize(
  96. {"message": "foo", "tags": [("foo", "bar"), ("biz", "baz", "boz")]}
  97. )
  98. assert len(data["errors"]) == 1
  99. assert data["errors"][0]["type"] == "invalid_data"
  100. assert data["errors"][0]["name"] == "tags.1"
  101. assert data["errors"][0]["value"] == ["biz", "baz", "boz"]
  102. def test_reserved_tags():
  103. data = validate_and_normalize(
  104. {"message": "foo", "tags": [("foo", "bar"), ("release", "abc123")]}
  105. )
  106. assert data["tags"] == [["foo", "bar"]]
  107. def test_tag_value():
  108. data = validate_and_normalize({"message": "foo", "tags": [("foo", "b\nar"), ("biz", "baz")]})
  109. assert data["tags"] == [["foo", None], ["biz", "baz"]]
  110. assert len(data["errors"]) == 1
  111. assert data["errors"][0]["type"] == "invalid_data"
  112. assert data["errors"][0]["name"] == "tags.0.1"
  113. assert data["errors"][0]["value"] == "b\nar"
  114. def test_extra_as_string():
  115. data = validate_and_normalize({"message": "foo", "extra": "bar"})
  116. assert data["extra"] == {}
  117. def test_release_tag_max_len():
  118. release_key = "sentry:release"
  119. release_value = "a" * MAX_VERSION_LENGTH
  120. data = validate_and_normalize({"message": "foo", "tags": [[release_key, release_value]]})
  121. assert "errors" not in data
  122. assert data["tags"] == [[release_key, release_value]]
  123. def test_server_name_too_long():
  124. key = "server_name"
  125. value = "a" * (MAX_CULPRIT_LENGTH + 1)
  126. data = validate_and_normalize({key: value})
  127. assert len(dict(data["tags"])[key]) == MAX_CULPRIT_LENGTH
  128. def test_site_too_long():
  129. key = "site"
  130. value = "a" * (MAX_CULPRIT_LENGTH + 1)
  131. data = validate_and_normalize({key: value})
  132. assert len(dict(data["tags"])[key]) == MAX_CULPRIT_LENGTH
  133. def test_release_too_long():
  134. data = validate_and_normalize({"release": "a" * (MAX_VERSION_LENGTH + 1)})
  135. assert not data.get("release")
  136. assert len(data["errors"]) == 1
  137. assert data["errors"][0]["type"] == "invalid_data"
  138. assert data["errors"][0]["name"] == "release"
  139. assert data["errors"][0]["value"] == "a" * (MAX_VERSION_LENGTH + 1)
  140. def test_release_as_non_string():
  141. data = validate_and_normalize({"release": 42})
  142. assert data["release"] == "42"
  143. def test_distribution_too_long():
  144. dist_len = 201
  145. data = validate_and_normalize({"release": "a" * 62, "dist": "b" * dist_len})
  146. # max dist length since relay-python 0.8.16 = 64 chars, and they started
  147. # return an error instead of truncating
  148. assert not data.get("dist")
  149. assert len(data["errors"]) == 1
  150. assert data["errors"][0]["type"] == "value_too_long"
  151. assert data["errors"][0]["name"] == "dist"
  152. assert data["errors"][0]["value"] == "b" * dist_len
  153. def test_distribution_bad_char():
  154. data = validate_and_normalize({"release": "a" * 62, "dist": "^%"})
  155. assert not data.get("dist")
  156. assert len(data["errors"]) == 1
  157. assert data["errors"][0]["type"] == "invalid_data"
  158. assert data["errors"][0]["name"] == "dist"
  159. assert data["errors"][0]["value"] == "^%"
  160. def test_distribution_strip():
  161. data = validate_and_normalize({"release": "a" * 62, "dist": " foo "})
  162. assert data["dist"] == "foo"
  163. def test_distribution_as_non_string():
  164. data = validate_and_normalize({"release": "42", "dist": 23})
  165. assert data["release"] == "42"
  166. assert data.get("dist") is None
  167. def test_distribution_no_release():
  168. data = validate_and_normalize({"dist": 23})
  169. assert data.get("dist") is None
  170. def test_valid_platform():
  171. data = validate_and_normalize({"platform": "python"})
  172. assert data["platform"] == "python"
  173. def test_no_platform():
  174. data = validate_and_normalize({})
  175. assert data["platform"] == "other"
  176. def test_invalid_platform():
  177. data = validate_and_normalize({"platform": "foobar"})
  178. assert data["platform"] == "other"
  179. def test_environment_too_long():
  180. data = validate_and_normalize({"environment": "a" * 65})
  181. assert not data.get("environment")
  182. (error,) = data["errors"]
  183. assert error["type"] == "invalid_data"
  184. assert error["name"] == "environment"
  185. assert error["value"] == "a" * 65
  186. def test_environment_invalid():
  187. data = validate_and_normalize({"environment": "a/b"})
  188. assert not data.get("environment")
  189. (error,) = data["errors"]
  190. assert error["type"] == "invalid_data"
  191. assert error["name"] == "environment"
  192. assert error["value"] == "a/b"
  193. def test_environment_as_non_string():
  194. data = validate_and_normalize({"environment": 42})
  195. assert data.get("environment") is None
  196. def test_time_spent_too_large():
  197. data = validate_and_normalize({"time_spent": 2147483647 + 1})
  198. assert data.get("time_spent") is None
  199. def test_time_spent_invalid():
  200. data = validate_and_normalize({"time_spent": "lol"})
  201. assert not data.get("time_spent")
  202. assert len(data["errors"]) == 1
  203. assert data["errors"][0]["type"] == "invalid_data"
  204. assert data["errors"][0]["name"] == "time_spent"
  205. assert data["errors"][0]["value"] == "lol"
  206. def test_time_spent_non_int():
  207. data = validate_and_normalize({"time_spent": "123"})
  208. assert data["time_spent"] is None
  209. def test_fingerprints():
  210. data = validate_and_normalize({"fingerprint": "2012-01-01T10:30:45"})
  211. assert not data.get("fingerprint")
  212. assert data["errors"][0]["type"] == "invalid_data"
  213. assert data["errors"][0]["name"] == "fingerprint"
  214. data = validate_and_normalize({"fingerprint": ["foo", ["bar"]]})
  215. assert data["fingerprint"] == ["foo"]
  216. # With rust, there will be errors emitted
  217. data = validate_and_normalize({"fingerprint": ["foo", None, "bar"]})
  218. assert data["fingerprint"] == ["foo", "bar"]
  219. # With rust, there will be errors emitted
  220. data = validate_and_normalize({"fingerprint": ["{{default}}", 1, "bar", 4.5, -2.7, True]})
  221. assert data["fingerprint"] == ["{{default}}", "1", "bar", "4", "-2", "True"]
  222. assert "errors" not in data
  223. data = validate_and_normalize({"fingerprint": ["{{default}}", 1e100, -1e100, 1e10]})
  224. assert data["fingerprint"] == ["{{default}}", "10000000000"]
  225. assert data["errors"] == [
  226. {"type": "invalid_data", "name": "fingerprint", "value": [1e100, -1e100]}
  227. ]
  228. data = validate_and_normalize({"fingerprint": []})
  229. assert "fingerprint" not in data
  230. assert "errors" not in data
  231. data = validate_and_normalize({"fingerprint": [""]})
  232. assert data["fingerprint"] == [""]
  233. assert "errors" not in data
  234. def test_messages():
  235. # Just 'message': wrap it in interface
  236. data = validate_and_normalize({"message": "foo is bar"})
  237. assert data["logentry"] == {"formatted": "foo is bar"}
  238. # both 'message' and interface with no 'formatted' value, put 'message'
  239. # into 'formatted'.
  240. data = validate_and_normalize(
  241. {"message": "foo is bar", "logentry": {"message": "something else"}}
  242. )
  243. assert data["logentry"] == {"formatted": "something else"}
  244. # both 'message' and complete interface, 'message' is discarded
  245. data = validate_and_normalize(
  246. {
  247. "message": "foo is bar",
  248. "logentry": {"message": "something else", "formatted": "something else formatted"},
  249. }
  250. )
  251. assert "errors" not in data
  252. assert data["logentry"] == {
  253. "message": "something else",
  254. "formatted": "something else formatted",
  255. }
  256. @pytest.mark.skip(reason="Message behavior that didn't make a lot of sense.")
  257. def test_messages_old_behavior():
  258. # both 'message' and complete valid interface but interface has the same
  259. # value for both keys so the 'formatted' value is discarded and ends up
  260. # being replaced with 'message'
  261. data = validate_and_normalize(
  262. {
  263. "message": "foo is bar",
  264. "logentry": {"message": "something else", "formatted": "something else"},
  265. }
  266. )
  267. assert "message" not in data
  268. assert "errors" not in data
  269. assert data["logentry"] == {"message": "something else", "formatted": "foo is bar"}
  270. # interface discarded as invalid, replaced by new interface containing
  271. # wrapped 'message'
  272. data = validate_and_normalize({"message": "foo is bar", "logentry": {"invalid": "invalid"}})
  273. assert "message" not in data
  274. assert len(data["errors"]) == 1
  275. assert data["logentry"] == {"message": "foo is bar"}