serializers.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. from typing import Dict, List, Tuple, Union
  2. from urllib.parse import urlparse
  3. from django.db import transaction
  4. from django.db.utils import IntegrityError
  5. from ipware import get_client_ip
  6. from anonymizeip import anonymize_ip
  7. from rest_framework import serializers
  8. from rest_framework.exceptions import PermissionDenied
  9. from sentry.eventtypes.error import ErrorEvent
  10. from sentry.eventtypes.base import DefaultEvent
  11. from issues.models import EventType, Issue
  12. from issues.serializers import BaseBreadcrumbsSerializer
  13. from issues.tasks import update_search_index_issue
  14. from environments.models import Environment
  15. from releases.models import Release
  16. from glitchtip.serializers import FlexibleDateTimeField
  17. from .models import Event, LogLevel
  18. from .fields import (
  19. GenericField,
  20. ForgivingHStoreField,
  21. ForgivingDisallowRegexField,
  22. QueryStringField,
  23. )
  24. from .event_tag_processors import TAG_PROCESSORS
  25. from .event_context_processors import EVENT_CONTEXT_PROCESSORS
  26. def replace(data: Union[str, dict, list], match: str, repl: str):
  27. """ A recursive replace function """
  28. if isinstance(data, dict):
  29. return {k: replace(v, match, repl) for k, v in data.items()}
  30. elif isinstance(data, list):
  31. return [replace(i, match, repl) for i in data]
  32. elif isinstance(data, str):
  33. return data.replace(match, repl)
  34. return data
  35. def sanitize_bad_postgres_chars(data: Union[str, dict, list]):
  36. """
  37. Remove values which are not supported by the postgres string data types
  38. """
  39. known_bads = ["\x00"]
  40. for known_bad in known_bads:
  41. data = data.replace(known_bad, " ")
  42. return data
  43. def sanitize_bad_postgres_json(data: Union[str, dict, list]):
  44. """
  45. Remove values which are not supported by the postgres JSONB data type
  46. """
  47. known_bads = ["\u0000"]
  48. for known_bad in known_bads:
  49. data = replace(data, known_bad, " ")
  50. return data
  51. class RequestSerializer(serializers.Serializer):
  52. env = serializers.DictField(
  53. child=serializers.CharField(allow_blank=True, allow_null=True), required=False
  54. )
  55. # Dict values can be both str and List[str]
  56. headers = serializers.DictField(required=False)
  57. url = serializers.CharField(required=False, allow_blank=True)
  58. method = serializers.CharField(required=False, allow_blank=True)
  59. query_string = QueryStringField(required=False, allow_null=True)
  60. class BreadcrumbsSerializer(BaseBreadcrumbsSerializer):
  61. timestamp = GenericField(required=False)
  62. def validate_level(self, value):
  63. if value == "log":
  64. return "info"
  65. return value
  66. class BaseSerializer(serializers.Serializer):
  67. def process_user(self, project, data):
  68. """ Fetch user data from SDK event and request """
  69. user = data.get("user", {})
  70. if self.context and self.context.get("request"):
  71. client_ip, is_routable = get_client_ip(self.context["request"])
  72. if user or is_routable:
  73. if is_routable:
  74. if project.should_scrub_ip_addresses:
  75. client_ip = anonymize_ip(client_ip)
  76. user["ip_address"] = client_ip
  77. return user
  78. class SentrySDKEventSerializer(BaseSerializer):
  79. """ Represents events coming from a OSS sentry SDK client """
  80. breadcrumbs = serializers.JSONField(required=False)
  81. tags = ForgivingHStoreField(required=False)
  82. event_id = serializers.UUIDField()
  83. extra = serializers.JSONField(required=False)
  84. request = RequestSerializer(required=False)
  85. server_name = serializers.CharField(required=False)
  86. sdk = serializers.JSONField(required=False)
  87. platform = serializers.CharField(required=False)
  88. release = serializers.CharField(required=False, allow_null=True)
  89. environment = ForgivingDisallowRegexField(
  90. required=False, allow_null=True, disallow_regex=r"^[^\n\r\f\/]*$"
  91. )
  92. _meta = serializers.JSONField(required=False)
  93. class FormattedMessageSerializer(serializers.Serializer):
  94. formatted = serializers.CharField(
  95. required=False
  96. ) # Documented as required, but some Sentry SDKs don't send it
  97. messages = serializers.CharField(required=False)
  98. params = serializers.ListField(child=serializers.CharField(), required=False)
  99. def to_internal_value(self, data):
  100. value = super().to_internal_value(data)
  101. return value.get("formatted", "")
  102. class MessageField(serializers.CharField):
  103. def to_internal_value(self, data):
  104. if isinstance(data, dict):
  105. serializer = FormattedMessageSerializer(data=data)
  106. serializer.is_valid(raise_exception=True)
  107. return serializer.validated_data
  108. return super().to_internal_value(data)
  109. class StoreDefaultSerializer(SentrySDKEventSerializer):
  110. """
  111. Default serializer. Used as both a base class and for default error types
  112. """
  113. type = EventType.DEFAULT
  114. contexts = serializers.JSONField(required=False)
  115. level = serializers.CharField(required=False)
  116. logentry = serializers.JSONField(required=False)
  117. message = MessageField(required=False, allow_blank=True, allow_null=True)
  118. timestamp = FlexibleDateTimeField(required=False)
  119. transaction = serializers.CharField(
  120. required=False, allow_null=True, allow_blank=True
  121. )
  122. user = serializers.JSONField(required=False)
  123. modules = serializers.JSONField(required=False)
  124. def validate_breadcrumbs(self, value):
  125. """
  126. Normalize breadcrumbs, which may come in as dict or list
  127. """
  128. if isinstance(value, list):
  129. value = {"values": value}
  130. if value.get("values") == []:
  131. return None
  132. serializer = BreadcrumbsSerializer(data=value.get("values"), many=True)
  133. if serializer.is_valid():
  134. return {"values": serializer.validated_data}
  135. return value
  136. def get_eventtype(self):
  137. """ Get event type class from self.type """
  138. if self.type is EventType.DEFAULT:
  139. return DefaultEvent()
  140. if self.type is EventType.ERROR:
  141. return ErrorEvent()
  142. def modify_exception(self, exception):
  143. """ OSS Sentry does this, I have no idea why """
  144. if exception:
  145. for value in exception.get("values", []):
  146. value.pop("module", None)
  147. if value.get("stacktrace") and value["stacktrace"].get("frames"):
  148. frames = value["stacktrace"]["frames"]
  149. # If in_app is always true, make it false ¯\_(ツ)_/¯
  150. if all(x.get("in_app") for x in frames):
  151. for frame in frames:
  152. frame["in_app"] = False
  153. return exception
  154. def generate_tags(self, data: Dict, tags: List[Tuple[str, str]] = []):
  155. """
  156. Determine tag relational data
  157. Optionally pass tags array for existing known tags to generate
  158. """
  159. for Processor in TAG_PROCESSORS:
  160. processor = Processor()
  161. value = processor.get_tag_values(data)
  162. if value:
  163. tags.append((processor.tag, value))
  164. if data.get("tags"):
  165. tags += [(k, v) for k, v in data["tags"].items()]
  166. return tags
  167. def annotate_contexts(self, event):
  168. """
  169. SDK events may contain contexts. This function adds additional contexts data
  170. """
  171. contexts = event.get("contexts")
  172. for Processor in EVENT_CONTEXT_PROCESSORS:
  173. processor = Processor()
  174. if contexts is None or not contexts.get(processor.name):
  175. processor_contexts = processor.get_context(event)
  176. if processor_contexts:
  177. if contexts is None:
  178. contexts = {}
  179. contexts[processor.name] = processor_contexts
  180. return contexts
  181. def get_message(self, data):
  182. """ Prefer message over logentry """
  183. if "message" in data:
  184. return data["message"]
  185. return data.get("logentry", {}).get("message", "")
  186. def get_environment(self, name: str, project):
  187. environment, _ = Environment.objects.get_or_create(
  188. name=name[: Environment._meta.get_field("name").max_length],
  189. organization=project.organization,
  190. )
  191. environment.projects.add(project)
  192. return environment
  193. def get_release(self, version: str, project):
  194. release, _ = Release.objects.get_or_create(
  195. version=version, organization=project.organization
  196. )
  197. release.projects.add(project)
  198. return release
  199. def create(self, data):
  200. project = self.context.get("project")
  201. eventtype = self.get_eventtype()
  202. metadata = eventtype.get_metadata(data)
  203. title = eventtype.get_title(metadata)
  204. culprit = eventtype.get_location(data)
  205. request = data.get("request")
  206. breadcrumbs = data.get("breadcrumbs")
  207. exception = data.get("exception")
  208. level = None
  209. if data.get("level"):
  210. level = LogLevel.from_string(data["level"])
  211. if (
  212. data.get("stacktrace")
  213. and exception
  214. and len(exception.get("values", 0)) > 0
  215. and not exception["values"][0].get("stacktrace")
  216. ):
  217. # stacktrace is deprecated, but supported at this time
  218. # Assume it's for the first exception value
  219. exception["values"][0]["stacktrace"] = data.get("stacktrace")
  220. exception = self.modify_exception(exception)
  221. if request:
  222. headers = request.get("headers")
  223. if headers:
  224. request["inferred_content_type"] = headers.get("Content-Type")
  225. sorted_headers = sorted([pair for pair in headers.items()])
  226. for idx, header in enumerate(sorted_headers):
  227. if isinstance(header[1], list):
  228. sorted_headers[idx] = (header[0], header[1][0])
  229. request["headers"] = sorted_headers
  230. contexts = self.annotate_contexts(data)
  231. data["contexts"] = contexts
  232. with transaction.atomic():
  233. if not project.first_event:
  234. project.first_event = data.get("timestamp")
  235. project.save(update_fields=["first_event"])
  236. defaults = {
  237. "metadata": sanitize_bad_postgres_json(metadata),
  238. }
  239. if level:
  240. defaults["level"] = level
  241. environment = None
  242. if data.get("environment"):
  243. environment = self.get_environment(data["environment"], project)
  244. release = None
  245. if data.get("release"):
  246. release = self.get_release(data["release"], project)
  247. tags = []
  248. if environment:
  249. tags.append(("environment", environment.name))
  250. if release:
  251. tags.append(("release", release.version))
  252. tags = self.generate_tags(data, tags)
  253. defaults["tags"] = {tag[0]: [tag[1]] for tag in tags}
  254. issue, issue_created = Issue.objects.get_or_create(
  255. title=sanitize_bad_postgres_chars(title),
  256. culprit=sanitize_bad_postgres_chars(culprit),
  257. project_id=project.id,
  258. type=self.type,
  259. defaults=defaults,
  260. )
  261. json_data = {
  262. "breadcrumbs": breadcrumbs,
  263. "contexts": contexts,
  264. "culprit": culprit,
  265. "exception": exception,
  266. "metadata": metadata,
  267. "message": self.get_message(data),
  268. "modules": data.get("modules"),
  269. "platform": data.get("platform", "other"),
  270. "request": request,
  271. "sdk": data.get("sdk"),
  272. "title": title,
  273. "type": self.type.label,
  274. }
  275. if environment:
  276. json_data["environment"] = environment.name
  277. extra = data.get("extra")
  278. if extra:
  279. json_data["extra"] = extra
  280. user = self.process_user(project, data)
  281. if user:
  282. json_data["user"] = user
  283. errors = None
  284. handled_errors = self.context.get("handled_errors")
  285. if handled_errors:
  286. errors = []
  287. for field_name, field_errors in handled_errors.items():
  288. for error in field_errors:
  289. errors.append(
  290. {
  291. "reason": str(error),
  292. "type": error.code,
  293. "name": field_name,
  294. "value": error.value,
  295. }
  296. )
  297. params = {
  298. "event_id": data["event_id"],
  299. "issue": issue,
  300. "tags": {tag[0]: tag[1] for tag in tags},
  301. "errors": errors,
  302. "timestamp": data.get("timestamp"),
  303. "data": sanitize_bad_postgres_json(json_data),
  304. "release": release,
  305. }
  306. if level:
  307. params["level"] = level
  308. try:
  309. event = Event.objects.create(**params)
  310. except IntegrityError as e:
  311. # This except is more efficient than a query for exists().
  312. if e.args and "event_id" in e.args[0]:
  313. raise PermissionDenied(
  314. "An event with the same ID already exists (%s)"
  315. % params["event_id"]
  316. ) from e
  317. raise e
  318. issue.check_for_status_update()
  319. update_search_index_issue(args=[issue.pk, issue_created], countdown=10)
  320. return event
  321. class StoreErrorSerializer(StoreDefaultSerializer):
  322. """ Primary difference is the presense of exception attribute """
  323. type = EventType.ERROR
  324. exception = serializers.JSONField(required=False)
  325. stacktrace = serializers.JSONField(
  326. required=False, help_text="Deprecated but supported at this time"
  327. )
  328. class StoreCSPReportSerializer(BaseSerializer):
  329. """
  330. CSP Report Serializer
  331. Very different format from others Store serializers.
  332. Does not extend base class due to differences.
  333. """
  334. type = EventType.CSP
  335. def __init__(self, *args, **kwargs):
  336. super().__init__(*args, **kwargs)
  337. # This is done to support the hyphen
  338. self.fields.update({"csp-report": serializers.JSONField()})
  339. def create(self, data):
  340. project = self.context.get("project")
  341. csp = data["csp-report"]
  342. title = self.get_title(csp)
  343. culprit = self.get_culprit(csp)
  344. uri = self.get_uri(csp)
  345. directive = self.get_effective_directive(csp)
  346. metadata = {
  347. "message": title,
  348. "uri": uri,
  349. "directive": directive,
  350. }
  351. issue, _ = Issue.objects.get_or_create(
  352. title=title,
  353. culprit=culprit,
  354. project_id=project.id,
  355. type=EventType.CSP,
  356. defaults={"metadata": metadata},
  357. )
  358. # Convert - to _
  359. normalized_csp = dict((k.replace("-", "_"), v) for k, v in csp.items())
  360. if "effective_directive" not in normalized_csp:
  361. normalized_csp["effective_directive"] = directive
  362. json_data = {
  363. "culprit": culprit,
  364. "csp": normalized_csp,
  365. "title": title,
  366. "metadata": metadata,
  367. "message": title,
  368. "type": EventType.CSP.label,
  369. }
  370. user = self.process_user(project, data)
  371. if user:
  372. json_data["user"] = user
  373. params = {
  374. "issue": issue,
  375. "data": json_data,
  376. }
  377. return Event.objects.create(**params)
  378. def get_effective_directive(self, data):
  379. """
  380. Some browers return effective-directive and others don't.
  381. Infer missing ones from violated directive
  382. """
  383. if "effective-directive" in data:
  384. return data["effective-directive"]
  385. first_violation = data["violated-directive"].split()[0]
  386. return first_violation
  387. def get_uri(self, data):
  388. url = data["blocked-uri"]
  389. return urlparse(url).netloc
  390. def get_title(self, data):
  391. effective_directive = self.get_effective_directive(data)
  392. humanized_directive = effective_directive.replace("-src", "")
  393. uri = self.get_uri(data)
  394. return f"Blocked '{humanized_directive}' from '{uri}'"
  395. def get_culprit(self, data):
  396. # "style-src cdn.example.com"
  397. return data.get("violated-directive")
  398. class EnvelopeHeaderSerializer(serializers.Serializer):
  399. event_id = serializers.UUIDField(required=False)
  400. sent_at = FlexibleDateTimeField(required=False)