test_validate_data.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. from datetime import datetime, timedelta
  2. import pytest
  3. from sentry.constants import MAX_CULPRIT_LENGTH, MAX_VERSION_LENGTH
  4. from sentry.event_manager import EventManager
  5. def validate_and_normalize(data):
  6. manager = EventManager(data)
  7. manager.normalize()
  8. return manager.get_data()
  9. def test_timestamp():
  10. data = validate_and_normalize({"timestamp": "not-a-timestamp"})
  11. assert len(data["errors"]) == 1
  12. now = datetime.utcnow()
  13. data = validate_and_normalize({"timestamp": now.strftime("%Y-%m-%dT%H:%M:%SZ")})
  14. assert "errors" not in data
  15. future = now + timedelta(minutes=2)
  16. data = validate_and_normalize({"timestamp": future.strftime("%Y-%m-%dT%H:%M:%SZ")})
  17. assert len(data["errors"]) == 1
  18. assert data["errors"][0]["type"] == "future_timestamp"
  19. past = now - timedelta(days=31)
  20. data = validate_and_normalize({"timestamp": past.strftime("%Y-%m-%dT%H:%M:%SZ")})
  21. assert len(data["errors"]) == 1
  22. assert data["errors"][0]["type"] == "past_timestamp"
  23. def test_empty_event_id():
  24. data = validate_and_normalize({"event_id": ""})
  25. assert len(data["event_id"]) == 32
  26. def test_missing_event_id():
  27. data = validate_and_normalize({})
  28. assert len(data["event_id"]) == 32
  29. def test_invalid_event_id():
  30. data = validate_and_normalize({"event_id": "a" * 33})
  31. assert len(data["event_id"]) == 32
  32. assert len(data["errors"]) == 1
  33. assert data["errors"][0]["type"] == "invalid_data"
  34. assert data["errors"][0]["name"] == "event_id"
  35. assert data["errors"][0]["value"] == "a" * 33
  36. data = validate_and_normalize({"event_id": "xyz"})
  37. assert len(data["event_id"]) == 32
  38. assert len(data["errors"]) == 1
  39. assert data["errors"][0]["type"] == "invalid_data"
  40. assert data["errors"][0]["name"] == "event_id"
  41. assert data["errors"][0]["value"] == "xyz"
  42. def test_unknown_attribute():
  43. data = validate_and_normalize({"message": "foo", "foo": "bar"})
  44. assert data["foo"] is None
  45. assert len(data["errors"]) == 1
  46. assert data["errors"][0]["type"] == "invalid_attribute"
  47. assert data["errors"][0]["name"] == "foo"
  48. def test_invalid_interface_name():
  49. data = validate_and_normalize({"message": "foo", "foo.baz": "bar"})
  50. assert data["foo.baz"] is None
  51. assert len(data["errors"]) == 1
  52. assert data["errors"][0]["type"] == "invalid_attribute"
  53. assert data["errors"][0]["name"] == "foo.baz"
  54. def test_invalid_interface_import_path():
  55. data = validate_and_normalize({"message": "foo", "exception2": "bar"})
  56. assert data["exception2"] is None
  57. assert len(data["errors"]) == 1
  58. assert data["errors"][0]["type"] == "invalid_attribute"
  59. assert data["errors"][0]["name"] == "exception2"
  60. def test_does_expand_list():
  61. data = validate_and_normalize(
  62. {
  63. "message": "foo",
  64. "exception": [{"type": "ValueError", "value": "hello world", "module": "foo.bar"}],
  65. }
  66. )
  67. assert "exception" in data
  68. def test_log_level_as_string():
  69. data = validate_and_normalize({"message": "foo", "level": "error"})
  70. assert data["level"] == "error"
  71. def test_log_level_as_int():
  72. data = validate_and_normalize({"message": "foo", "level": 40})
  73. assert data["level"] == "error"
  74. def test_invalid_log_level():
  75. data = validate_and_normalize({"message": "foo", "level": "foobar"})
  76. assert data["level"] == "error"
  77. assert len(data["errors"]) == 1
  78. assert data["errors"][0]["type"] == "invalid_data"
  79. assert data["errors"][0]["name"] == "level"
  80. assert data["errors"][0]["value"] == "foobar"
  81. def test_tags_as_string():
  82. data = validate_and_normalize({"message": "foo", "tags": "bar"})
  83. assert data["tags"] == []
  84. def test_tags_with_spaces():
  85. data = validate_and_normalize({"message": "foo", "tags": {"foo bar": "baz bar"}})
  86. assert data["tags"] == [["foo-bar", "baz bar"]]
  87. def test_tags_out_of_bounds():
  88. data = validate_and_normalize(
  89. {"message": "foo", "tags": {"f" * 33: "value", "foo": "v" * 201, "bar": "value"}}
  90. )
  91. assert data["tags"] == [["bar", "value"], [None, "value"], ["foo", None]]
  92. assert len(data["errors"]) == 2
  93. def test_tags_as_invalid_pair():
  94. data = validate_and_normalize(
  95. {"message": "foo", "tags": [("foo", "bar"), ("biz", "baz", "boz")]}
  96. )
  97. assert len(data["errors"]) == 1
  98. assert data["errors"][0]["type"] == "invalid_data"
  99. assert data["errors"][0]["name"] == "tags.1"
  100. assert data["errors"][0]["value"] == ["biz", "baz", "boz"]
  101. def test_reserved_tags():
  102. data = validate_and_normalize(
  103. {"message": "foo", "tags": [("foo", "bar"), ("release", "abc123")]}
  104. )
  105. assert data["tags"] == [["foo", "bar"]]
  106. def test_tag_value():
  107. data = validate_and_normalize({"message": "foo", "tags": [("foo", "b\nar"), ("biz", "baz")]})
  108. assert data["tags"] == [["foo", None], ["biz", "baz"]]
  109. assert len(data["errors"]) == 1
  110. assert data["errors"][0]["type"] == "invalid_data"
  111. assert data["errors"][0]["name"] == "tags.0.1"
  112. assert data["errors"][0]["value"] == "b\nar"
  113. def test_extra_as_string():
  114. data = validate_and_normalize({"message": "foo", "extra": "bar"})
  115. assert data["extra"] == {}
  116. def test_release_tag_max_len():
  117. release_key = "sentry:release"
  118. release_value = "a" * MAX_VERSION_LENGTH
  119. data = validate_and_normalize({"message": "foo", "tags": [[release_key, release_value]]})
  120. assert "errors" not in data
  121. assert data["tags"] == [[release_key, release_value]]
  122. def test_server_name_too_long():
  123. key = "server_name"
  124. value = "a" * (MAX_CULPRIT_LENGTH + 1)
  125. data = validate_and_normalize({key: value})
  126. assert len(dict(data["tags"])[key]) == MAX_CULPRIT_LENGTH
  127. def test_site_too_long():
  128. key = "site"
  129. value = "a" * (MAX_CULPRIT_LENGTH + 1)
  130. data = validate_and_normalize({key: value})
  131. assert len(dict(data["tags"])[key]) == MAX_CULPRIT_LENGTH
  132. def test_release_too_long():
  133. data = validate_and_normalize({"release": "a" * (MAX_VERSION_LENGTH + 1)})
  134. assert not data.get("release")
  135. assert len(data["errors"]) == 1
  136. assert data["errors"][0]["type"] == "invalid_data"
  137. assert data["errors"][0]["name"] == "release"
  138. assert data["errors"][0]["value"] == "a" * (MAX_VERSION_LENGTH + 1)
  139. def test_release_as_non_string():
  140. data = validate_and_normalize({"release": 42})
  141. assert data["release"] == "42"
  142. def test_distribution_too_long():
  143. dist_len = 201
  144. data = validate_and_normalize({"release": "a" * 62, "dist": "b" * dist_len})
  145. # max dist length since relay-python 0.8.16 = 64 chars, and they started
  146. # return an error instead of truncating
  147. assert not data.get("dist")
  148. assert len(data["errors"]) == 1
  149. assert data["errors"][0]["type"] == "value_too_long"
  150. assert data["errors"][0]["name"] == "dist"
  151. assert data["errors"][0]["value"] == "b" * dist_len
  152. def test_distribution_bad_char():
  153. data = validate_and_normalize({"release": "a" * 62, "dist": "^%"})
  154. assert not data.get("dist")
  155. assert len(data["errors"]) == 1
  156. assert data["errors"][0]["type"] == "invalid_data"
  157. assert data["errors"][0]["name"] == "dist"
  158. assert data["errors"][0]["value"] == "^%"
  159. def test_distribution_strip():
  160. data = validate_and_normalize({"release": "a" * 62, "dist": " foo "})
  161. assert data["dist"] == "foo"
  162. def test_distribution_as_non_string():
  163. data = validate_and_normalize({"release": "42", "dist": 23})
  164. assert data["release"] == "42"
  165. assert data.get("dist") is None
  166. def test_distribution_no_release():
  167. data = validate_and_normalize({"dist": 23})
  168. assert data.get("dist") is None
  169. def test_valid_platform():
  170. data = validate_and_normalize({"platform": "python"})
  171. assert data["platform"] == "python"
  172. def test_no_platform():
  173. data = validate_and_normalize({})
  174. assert data["platform"] == "other"
  175. def test_invalid_platform():
  176. data = validate_and_normalize({"platform": "foobar"})
  177. assert data["platform"] == "other"
  178. def test_environment_too_long():
  179. data = validate_and_normalize({"environment": "a" * 65})
  180. assert not data.get("environment")
  181. (error,) = data["errors"]
  182. assert error["type"] == "invalid_data"
  183. assert error["name"] == "environment"
  184. assert error["value"] == "a" * 65
  185. def test_environment_invalid():
  186. data = validate_and_normalize({"environment": "a/b"})
  187. assert not data.get("environment")
  188. (error,) = data["errors"]
  189. assert error["type"] == "invalid_data"
  190. assert error["name"] == "environment"
  191. assert error["value"] == "a/b"
  192. def test_environment_as_non_string():
  193. data = validate_and_normalize({"environment": 42})
  194. assert data.get("environment") is None
  195. def test_time_spent_too_large():
  196. data = validate_and_normalize({"time_spent": 2147483647 + 1})
  197. assert data.get("time_spent") is None
  198. def test_time_spent_invalid():
  199. data = validate_and_normalize({"time_spent": "lol"})
  200. assert not data.get("time_spent")
  201. assert len(data["errors"]) == 1
  202. assert data["errors"][0]["type"] == "invalid_data"
  203. assert data["errors"][0]["name"] == "time_spent"
  204. assert data["errors"][0]["value"] == "lol"
  205. def test_time_spent_non_int():
  206. data = validate_and_normalize({"time_spent": "123"})
  207. assert data["time_spent"] is None
  208. def test_fingerprints():
  209. data = validate_and_normalize({"fingerprint": "2012-01-01T10:30:45"})
  210. assert not data.get("fingerprint")
  211. assert data["errors"][0]["type"] == "invalid_data"
  212. assert data["errors"][0]["name"] == "fingerprint"
  213. data = validate_and_normalize({"fingerprint": ["foo", ["bar"]]})
  214. assert data["fingerprint"] == ["foo"]
  215. # With rust, there will be errors emitted
  216. data = validate_and_normalize({"fingerprint": ["foo", None, "bar"]})
  217. assert data["fingerprint"] == ["foo", "bar"]
  218. # With rust, there will be errors emitted
  219. data = validate_and_normalize({"fingerprint": ["{{default}}", 1, "bar", 4.5, -2.7, True]})
  220. assert data["fingerprint"] == ["{{default}}", "1", "bar", "4", "-2", "True"]
  221. assert "errors" not in data
  222. data = validate_and_normalize({"fingerprint": ["{{default}}", 1e100, -1e100, 1e10]})
  223. assert data["fingerprint"] == ["{{default}}", "10000000000"]
  224. assert data["errors"] == [
  225. {"type": "invalid_data", "name": "fingerprint", "value": [1e100, -1e100]}
  226. ]
  227. data = validate_and_normalize({"fingerprint": []})
  228. assert "fingerprint" not in data
  229. assert "errors" not in data
  230. data = validate_and_normalize({"fingerprint": [""]})
  231. assert data["fingerprint"] == [""]
  232. assert "errors" not in data
  233. def test_messages():
  234. # Just 'message': wrap it in interface
  235. data = validate_and_normalize({"message": "foo is bar"})
  236. assert data["logentry"] == {"formatted": "foo is bar"}
  237. # both 'message' and interface with no 'formatted' value, put 'message'
  238. # into 'formatted'.
  239. data = validate_and_normalize(
  240. {"message": "foo is bar", "logentry": {"message": "something else"}}
  241. )
  242. assert data["logentry"] == {"formatted": "something else"}
  243. # both 'message' and complete interface, 'message' is discarded
  244. data = validate_and_normalize(
  245. {
  246. "message": "foo is bar",
  247. "logentry": {"message": "something else", "formatted": "something else formatted"},
  248. }
  249. )
  250. assert "errors" not in data
  251. assert data["logentry"] == {
  252. "message": "something else",
  253. "formatted": "something else formatted",
  254. }
  255. @pytest.mark.skip(reason="Message behavior that didn't make a lot of sense.")
  256. def test_messages_old_behavior():
  257. # both 'message' and complete valid interface but interface has the same
  258. # value for both keys so the 'formatted' value is discarded and ends up
  259. # being replaced with 'message'
  260. data = validate_and_normalize(
  261. {
  262. "message": "foo is bar",
  263. "logentry": {"message": "something else", "formatted": "something else"},
  264. }
  265. )
  266. assert "message" not in data
  267. assert "errors" not in data
  268. assert data["logentry"] == {"message": "something else", "formatted": "foo is bar"}
  269. # interface discarded as invalid, replaced by new interface containing
  270. # wrapped 'message'
  271. data = validate_and_normalize({"message": "foo is bar", "logentry": {"invalid": "invalid"}})
  272. assert "message" not in data
  273. assert len(data["errors"]) == 1
  274. assert data["logentry"] == {"message": "foo is bar"}