serializers.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. from typing import Dict, List, Tuple, Union
  2. from urllib.parse import urlparse
  3. from django.db import transaction
  4. from django.db.utils import IntegrityError
  5. from ipware import get_client_ip
  6. from anonymizeip import anonymize_ip
  7. from rest_framework import serializers
  8. from rest_framework.exceptions import PermissionDenied
  9. from sentry.eventtypes.error import ErrorEvent
  10. from sentry.eventtypes.base import DefaultEvent
  11. from issues.models import EventType, Issue
  12. from issues.serializers import BaseBreadcrumbsSerializer
  13. from environments.models import Environment
  14. from releases.models import Release
  15. from glitchtip.serializers import FlexibleDateTimeField
  16. from .models import Event, LogLevel
  17. from .fields import (
  18. GenericField,
  19. ForgivingHStoreField,
  20. ForgivingDisallowRegexField,
  21. QueryStringField,
  22. )
  23. from .event_tag_processors import TAG_PROCESSORS
  24. from .event_context_processors import EVENT_CONTEXT_PROCESSORS
  25. def replace(data: Union[str, dict, list], match: str, repl: str):
  26. """ A recursive replace function """
  27. if isinstance(data, dict):
  28. return {k: replace(v, match, repl) for k, v in data.items()}
  29. elif isinstance(data, list):
  30. return [replace(i, match, repl) for i in data]
  31. elif isinstance(data, str):
  32. return data.replace(match, repl)
  33. return data
  34. def sanitize_bad_postgres_chars(data: Union[str, dict, list]):
  35. """
  36. Remove values which are not supported by the postgres string data types
  37. """
  38. known_bads = ["\x00"]
  39. for known_bad in known_bads:
  40. data = data.replace(known_bad, " ")
  41. return data
  42. def sanitize_bad_postgres_json(data: Union[str, dict, list]):
  43. """
  44. Remove values which are not supported by the postgres JSONB data type
  45. """
  46. known_bads = ["\u0000"]
  47. for known_bad in known_bads:
  48. data = replace(data, known_bad, " ")
  49. return data
  50. class RequestSerializer(serializers.Serializer):
  51. env = serializers.DictField(
  52. child=serializers.CharField(allow_blank=True, allow_null=True), required=False
  53. )
  54. # Dict values can be both str and List[str]
  55. headers = serializers.DictField(required=False)
  56. url = serializers.CharField(required=False, allow_blank=True)
  57. method = serializers.CharField(required=False, allow_blank=True)
  58. query_string = QueryStringField(required=False, allow_null=True)
  59. class BreadcrumbsSerializer(BaseBreadcrumbsSerializer):
  60. timestamp = GenericField(required=False)
  61. def validate_level(self, value):
  62. if value == "log":
  63. return "info"
  64. return value
  65. class BaseSerializer(serializers.Serializer):
  66. def process_user(self, project, data):
  67. """ Fetch user data from SDK event and request """
  68. user = data.get("user", {})
  69. if self.context and self.context.get("request"):
  70. client_ip, is_routable = get_client_ip(self.context["request"])
  71. if user or is_routable:
  72. if is_routable:
  73. if project.should_scrub_ip_addresses:
  74. client_ip = anonymize_ip(client_ip)
  75. user["ip_address"] = client_ip
  76. return user
  77. class SentrySDKEventSerializer(BaseSerializer):
  78. """ Represents events coming from a OSS sentry SDK client """
  79. breadcrumbs = serializers.JSONField(required=False)
  80. tags = ForgivingHStoreField(required=False)
  81. event_id = serializers.UUIDField()
  82. extra = serializers.JSONField(required=False)
  83. request = RequestSerializer(required=False)
  84. server_name = serializers.CharField(required=False)
  85. sdk = serializers.JSONField(required=False)
  86. platform = serializers.CharField(required=False)
  87. release = serializers.CharField(required=False, allow_null=True)
  88. environment = ForgivingDisallowRegexField(
  89. required=False, allow_null=True, disallow_regex=r"^[^\n\r\f\/]*$"
  90. )
  91. _meta = serializers.JSONField(required=False)
  92. class FormattedMessageSerializer(serializers.Serializer):
  93. formatted = serializers.CharField(
  94. required=False
  95. ) # Documented as required, but some Sentry SDKs don't send it
  96. messages = serializers.CharField(required=False)
  97. params = serializers.ListField(child=serializers.CharField(), required=False)
  98. def to_internal_value(self, data):
  99. value = super().to_internal_value(data)
  100. return value.get("formatted", "")
  101. class MessageField(serializers.CharField):
  102. def to_internal_value(self, data):
  103. if isinstance(data, dict):
  104. serializer = FormattedMessageSerializer(data=data)
  105. serializer.is_valid(raise_exception=True)
  106. return serializer.validated_data
  107. return super().to_internal_value(data)
  108. class StoreDefaultSerializer(SentrySDKEventSerializer):
  109. """
  110. Default serializer. Used as both a base class and for default error types
  111. """
  112. type = EventType.DEFAULT
  113. contexts = serializers.JSONField(required=False)
  114. level = serializers.CharField(required=False)
  115. logentry = serializers.JSONField(required=False)
  116. message = MessageField(required=False, allow_blank=True, allow_null=True)
  117. timestamp = FlexibleDateTimeField(required=False)
  118. transaction = serializers.CharField(
  119. required=False, allow_null=True, allow_blank=True
  120. )
  121. user = serializers.JSONField(required=False)
  122. modules = serializers.JSONField(required=False)
  123. def validate_breadcrumbs(self, value):
  124. """
  125. Normalize breadcrumbs, which may come in as dict or list
  126. """
  127. if isinstance(value, list):
  128. value = {"values": value}
  129. if value.get("values") == []:
  130. return None
  131. serializer = BreadcrumbsSerializer(data=value.get("values"), many=True)
  132. if serializer.is_valid():
  133. return {"values": serializer.validated_data}
  134. return value
  135. def get_eventtype(self):
  136. """ Get event type class from self.type """
  137. if self.type is EventType.DEFAULT:
  138. return DefaultEvent()
  139. if self.type is EventType.ERROR:
  140. return ErrorEvent()
  141. def modify_exception(self, exception):
  142. """ OSS Sentry does this, I have no idea why """
  143. if exception:
  144. for value in exception.get("values", []):
  145. value.pop("module", None)
  146. if value.get("stacktrace") and value["stacktrace"].get("frames"):
  147. frames = value["stacktrace"]["frames"]
  148. # If in_app is always true, make it false ¯\_(ツ)_/¯
  149. if all(x.get("in_app") for x in frames):
  150. for frame in frames:
  151. frame["in_app"] = False
  152. return exception
  153. def generate_tags(self, data: Dict, tags: List[Tuple[str, str]] = []):
  154. """
  155. Determine tag relational data
  156. Optionally pass tags array for existing known tags to generate
  157. """
  158. for Processor in TAG_PROCESSORS:
  159. processor = Processor()
  160. value = processor.get_tag_values(data)
  161. if value:
  162. tags.append((processor.tag, value))
  163. if data.get("tags"):
  164. tags += [(k, v) for k, v in data["tags"].items()]
  165. return tags
  166. def annotate_contexts(self, event):
  167. """
  168. SDK events may contain contexts. This function adds additional contexts data
  169. """
  170. contexts = event.get("contexts")
  171. for Processor in EVENT_CONTEXT_PROCESSORS:
  172. processor = Processor()
  173. if contexts is None or not contexts.get(processor.name):
  174. processor_contexts = processor.get_context(event)
  175. if processor_contexts:
  176. if contexts is None:
  177. contexts = {}
  178. contexts[processor.name] = processor_contexts
  179. return contexts
  180. def get_message(self, data):
  181. """ Prefer message over logentry """
  182. if "message" in data:
  183. return data["message"]
  184. return data.get("logentry", {}).get("message", "")
  185. def get_environment(self, name: str, project):
  186. environment, _ = Environment.objects.get_or_create(
  187. name=name[: Environment._meta.get_field("name").max_length],
  188. organization=project.organization,
  189. )
  190. environment.projects.add(project)
  191. return environment
  192. def get_release(self, version: str, project):
  193. release, _ = Release.objects.get_or_create(
  194. version=version, organization=project.organization
  195. )
  196. release.projects.add(project)
  197. return release
  198. def create(self, data):
  199. project = self.context.get("project")
  200. eventtype = self.get_eventtype()
  201. metadata = eventtype.get_metadata(data)
  202. title = eventtype.get_title(metadata)
  203. culprit = eventtype.get_location(data)
  204. request = data.get("request")
  205. breadcrumbs = data.get("breadcrumbs")
  206. exception = data.get("exception")
  207. level = None
  208. if data.get("level"):
  209. level = LogLevel.from_string(data["level"])
  210. if (
  211. data.get("stacktrace")
  212. and exception
  213. and len(exception.get("values", 0)) > 0
  214. and not exception["values"][0].get("stacktrace")
  215. ):
  216. # stacktrace is deprecated, but supported at this time
  217. # Assume it's for the first exception value
  218. exception["values"][0]["stacktrace"] = data.get("stacktrace")
  219. exception = self.modify_exception(exception)
  220. if request:
  221. headers = request.get("headers")
  222. if headers:
  223. request["inferred_content_type"] = headers.get("Content-Type")
  224. sorted_headers = sorted([pair for pair in headers.items()])
  225. for idx, header in enumerate(sorted_headers):
  226. if isinstance(header[1], list):
  227. sorted_headers[idx] = (header[0], header[1][0])
  228. request["headers"] = sorted_headers
  229. contexts = self.annotate_contexts(data)
  230. data["contexts"] = contexts
  231. with transaction.atomic():
  232. if not project.first_event:
  233. project.first_event = data.get("timestamp")
  234. project.save(update_fields=["first_event"])
  235. defaults = {"metadata": sanitize_bad_postgres_json(metadata)}
  236. if level:
  237. defaults["level"] = level
  238. issue, _ = Issue.objects.get_or_create(
  239. title=sanitize_bad_postgres_chars(title),
  240. culprit=sanitize_bad_postgres_chars(culprit),
  241. project_id=project.id,
  242. type=self.type,
  243. defaults=defaults,
  244. )
  245. environment = None
  246. if data.get("environment"):
  247. environment = self.get_environment(data["environment"], project)
  248. release = None
  249. if data.get("release"):
  250. release = self.get_release(data["release"], project)
  251. tags = []
  252. if environment:
  253. tags.append(("environment", environment.name))
  254. if release:
  255. tags.append(("release", release.version))
  256. tags = self.generate_tags(data, tags)
  257. tags = {tag[0]: tag[1] for tag in tags}
  258. json_data = {
  259. "breadcrumbs": breadcrumbs,
  260. "contexts": contexts,
  261. "culprit": culprit,
  262. "exception": exception,
  263. "metadata": metadata,
  264. "message": self.get_message(data),
  265. "modules": data.get("modules"),
  266. "platform": data.get("platform", "other"),
  267. "request": request,
  268. "sdk": data.get("sdk"),
  269. "title": title,
  270. "type": self.type.label,
  271. }
  272. if environment:
  273. json_data["environment"] = environment.name
  274. extra = data.get("extra")
  275. if extra:
  276. json_data["extra"] = extra
  277. user = self.process_user(project, data)
  278. if user:
  279. json_data["user"] = user
  280. errors = None
  281. handled_errors = self.context.get("handled_errors")
  282. if handled_errors:
  283. errors = []
  284. for field_name, field_errors in handled_errors.items():
  285. for error in field_errors:
  286. errors.append(
  287. {
  288. "reason": str(error),
  289. "type": error.code,
  290. "name": field_name,
  291. "value": error.value,
  292. }
  293. )
  294. params = {
  295. "event_id": data["event_id"],
  296. "issue": issue,
  297. "tags": tags,
  298. "errors": errors,
  299. "timestamp": data.get("timestamp"),
  300. "data": sanitize_bad_postgres_json(json_data),
  301. "release": release,
  302. }
  303. if level:
  304. params["level"] = level
  305. try:
  306. event = Event.objects.create(**params)
  307. except IntegrityError as e:
  308. # This except is more efficient than a query for exists().
  309. if e.args and "event_id" in e.args[0]:
  310. raise PermissionDenied(
  311. "An event with the same ID already exists (%s)"
  312. % params["event_id"]
  313. ) from e
  314. raise e
  315. issue.check_for_status_update()
  316. return event
  317. class StoreErrorSerializer(StoreDefaultSerializer):
  318. """ Primary difference is the presense of exception attribute """
  319. type = EventType.ERROR
  320. exception = serializers.JSONField(required=False)
  321. stacktrace = serializers.JSONField(
  322. required=False, help_text="Deprecated but supported at this time"
  323. )
  324. class StoreCSPReportSerializer(BaseSerializer):
  325. """
  326. CSP Report Serializer
  327. Very different format from others Store serializers.
  328. Does not extend base class due to differences.
  329. """
  330. type = EventType.CSP
  331. def __init__(self, *args, **kwargs):
  332. super().__init__(*args, **kwargs)
  333. # This is done to support the hyphen
  334. self.fields.update({"csp-report": serializers.JSONField()})
  335. def create(self, data):
  336. project = self.context.get("project")
  337. csp = data["csp-report"]
  338. title = self.get_title(csp)
  339. culprit = self.get_culprit(csp)
  340. uri = self.get_uri(csp)
  341. directive = self.get_effective_directive(csp)
  342. metadata = {
  343. "message": title,
  344. "uri": uri,
  345. "directive": directive,
  346. }
  347. issue, _ = Issue.objects.get_or_create(
  348. title=title,
  349. culprit=culprit,
  350. project_id=project.id,
  351. type=EventType.CSP,
  352. defaults={"metadata": metadata},
  353. )
  354. # Convert - to _
  355. normalized_csp = dict((k.replace("-", "_"), v) for k, v in csp.items())
  356. if "effective_directive" not in normalized_csp:
  357. normalized_csp["effective_directive"] = directive
  358. json_data = {
  359. "culprit": culprit,
  360. "csp": normalized_csp,
  361. "title": title,
  362. "metadata": metadata,
  363. "message": title,
  364. "type": EventType.CSP.label,
  365. }
  366. user = self.process_user(project, data)
  367. if user:
  368. json_data["user"] = user
  369. params = {
  370. "issue": issue,
  371. "data": json_data,
  372. }
  373. return Event.objects.create(**params)
  374. def get_effective_directive(self, data):
  375. """
  376. Some browers return effective-directive and others don't.
  377. Infer missing ones from violated directive
  378. """
  379. if "effective-directive" in data:
  380. return data["effective-directive"]
  381. first_violation = data["violated-directive"].split()[0]
  382. return first_violation
  383. def get_uri(self, data):
  384. url = data["blocked-uri"]
  385. return urlparse(url).netloc
  386. def get_title(self, data):
  387. effective_directive = self.get_effective_directive(data)
  388. humanized_directive = effective_directive.replace("-src", "")
  389. uri = self.get_uri(data)
  390. return f"Blocked '{humanized_directive}' from '{uri}'"
  391. def get_culprit(self, data):
  392. # "style-src cdn.example.com"
  393. return data.get("violated-directive")
  394. class EnvelopeHeaderSerializer(serializers.Serializer):
  395. event_id = serializers.UUIDField(required=False)
  396. sent_at = FlexibleDateTimeField(required=False)