serializers.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. from typing import Dict, List, Tuple, Union
  2. from urllib.parse import urlparse
  3. from django.db import transaction
  4. from django.db.utils import IntegrityError
  5. from ipware import get_client_ip
  6. from anonymizeip import anonymize_ip
  7. from rest_framework import serializers
  8. from rest_framework.exceptions import PermissionDenied
  9. from sentry.eventtypes.error import ErrorEvent
  10. from sentry.eventtypes.base import DefaultEvent
  11. from issues.models import EventType, Issue
  12. from issues.serializers import BaseBreadcrumbsSerializer
  13. from issues.tasks import update_search_index_issue
  14. from environments.models import Environment
  15. from releases.models import Release
  16. from glitchtip.serializers import FlexibleDateTimeField
  17. from .models import Event, LogLevel
  18. from .fields import (
  19. GenericField,
  20. ForgivingHStoreField,
  21. ForgivingDisallowRegexField,
  22. QueryStringField,
  23. )
  24. from .event_tag_processors import TAG_PROCESSORS
  25. from .event_context_processors import EVENT_CONTEXT_PROCESSORS
  26. def replace(data: Union[str, dict, list], match: str, repl: str):
  27. """ A recursive replace function """
  28. if isinstance(data, dict):
  29. return {k: replace(v, match, repl) for k, v in data.items()}
  30. elif isinstance(data, list):
  31. return [replace(i, match, repl) for i in data]
  32. elif isinstance(data, str):
  33. return data.replace(match, repl)
  34. return data
  35. def sanitize_bad_postgres_chars(data: Union[str, dict, list]):
  36. """
  37. Remove values which are not supported by the postgres string data types
  38. """
  39. known_bads = ["\x00"]
  40. for known_bad in known_bads:
  41. data = data.replace(known_bad, " ")
  42. return data
  43. def sanitize_bad_postgres_json(data: Union[str, dict, list]):
  44. """
  45. Remove values which are not supported by the postgres JSONB data type
  46. """
  47. known_bads = ["\u0000"]
  48. for known_bad in known_bads:
  49. data = replace(data, known_bad, " ")
  50. return data
  51. class RequestSerializer(serializers.Serializer):
  52. env = serializers.DictField(
  53. child=serializers.CharField(allow_blank=True, allow_null=True), required=False
  54. )
  55. # Dict values can be both str and List[str]
  56. headers = serializers.DictField(required=False)
  57. url = serializers.CharField(required=False, allow_blank=True)
  58. method = serializers.CharField(required=False, allow_blank=True)
  59. query_string = QueryStringField(required=False, allow_null=True)
  60. class BreadcrumbsSerializer(BaseBreadcrumbsSerializer):
  61. timestamp = GenericField(required=False)
  62. def validate_level(self, value):
  63. if value == "log":
  64. return "info"
  65. return value
  66. class BaseSerializer(serializers.Serializer):
  67. def process_user(self, project, data):
  68. """ Fetch user data from SDK event and request """
  69. user = data.get("user", {})
  70. if self.context and self.context.get("request"):
  71. client_ip, is_routable = get_client_ip(self.context["request"])
  72. if user or is_routable:
  73. if is_routable:
  74. if project.should_scrub_ip_addresses:
  75. client_ip = anonymize_ip(client_ip)
  76. user["ip_address"] = client_ip
  77. return user
  78. class SentrySDKEventSerializer(BaseSerializer):
  79. """ Represents events coming from a OSS sentry SDK client """
  80. breadcrumbs = serializers.JSONField(required=False)
  81. tags = ForgivingHStoreField(required=False)
  82. event_id = serializers.UUIDField()
  83. extra = serializers.JSONField(required=False)
  84. request = RequestSerializer(required=False)
  85. server_name = serializers.CharField(required=False)
  86. sdk = serializers.JSONField(required=False)
  87. platform = serializers.CharField(required=False)
  88. release = serializers.CharField(required=False, allow_null=True)
  89. environment = ForgivingDisallowRegexField(
  90. required=False, allow_null=True, disallow_regex=r"^[^\n\r\f\/]*$"
  91. )
  92. _meta = serializers.JSONField(required=False)
  93. class FormattedMessageSerializer(serializers.Serializer):
  94. formatted = serializers.CharField(
  95. required=False
  96. ) # Documented as required, but some Sentry SDKs don't send it
  97. messages = serializers.CharField(required=False)
  98. params = serializers.ListField(child=serializers.CharField(), required=False)
  99. def to_internal_value(self, data):
  100. value = super().to_internal_value(data)
  101. return value.get("formatted", "")
  102. class MessageField(serializers.CharField):
  103. def to_internal_value(self, data):
  104. if isinstance(data, dict):
  105. serializer = FormattedMessageSerializer(data=data)
  106. serializer.is_valid(raise_exception=True)
  107. return serializer.validated_data
  108. return super().to_internal_value(data)
  109. class LogEntrySerializer(serializers.Serializer):
  110. formatted = serializers.CharField(required=False)
  111. message = serializers.CharField(required=False)
  112. params = serializers.JSONField(required=False)
  113. def validate(self, attrs):
  114. data = super().validate(attrs)
  115. if not data.get("formatted") and data.get("params"):
  116. params = data["params"]
  117. if isinstance(params, list):
  118. data["formatted"] = data["message"] % tuple(data["params"])
  119. return data
  120. class StoreDefaultSerializer(SentrySDKEventSerializer):
  121. """
  122. Default serializer. Used as both a base class and for default error types
  123. """
  124. type = EventType.DEFAULT
  125. contexts = serializers.JSONField(required=False)
  126. level = serializers.CharField(required=False)
  127. logentry = LogEntrySerializer(required=False)
  128. message = MessageField(required=False, allow_blank=True, allow_null=True)
  129. timestamp = FlexibleDateTimeField(required=False)
  130. transaction = serializers.CharField(
  131. required=False, allow_null=True, allow_blank=True
  132. )
  133. user = serializers.JSONField(required=False)
  134. modules = serializers.JSONField(required=False)
  135. def validate_breadcrumbs(self, value):
  136. """
  137. Normalize breadcrumbs, which may come in as dict or list
  138. """
  139. if isinstance(value, list):
  140. value = {"values": value}
  141. if value.get("values") == []:
  142. return None
  143. serializer = BreadcrumbsSerializer(data=value.get("values"), many=True)
  144. if serializer.is_valid():
  145. return {"values": serializer.validated_data}
  146. return value
  147. def get_eventtype(self):
  148. """ Get event type class from self.type """
  149. if self.type is EventType.DEFAULT:
  150. return DefaultEvent()
  151. if self.type is EventType.ERROR:
  152. return ErrorEvent()
  153. def modify_exception(self, exception):
  154. """ OSS Sentry does this, I have no idea why """
  155. if exception:
  156. for value in exception.get("values", []):
  157. value.pop("module", None)
  158. if value.get("stacktrace") and value["stacktrace"].get("frames"):
  159. frames = value["stacktrace"]["frames"]
  160. # If in_app is always true, make it false ¯\_(ツ)_/¯
  161. if all(x.get("in_app") for x in frames):
  162. for frame in frames:
  163. frame["in_app"] = False
  164. return exception
  165. def generate_tags(self, data: Dict, tags: List[Tuple[str, str]] = []):
  166. """
  167. Determine tag relational data
  168. Optionally pass tags array for existing known tags to generate
  169. """
  170. for Processor in TAG_PROCESSORS:
  171. processor = Processor()
  172. value = processor.get_tag_values(data)
  173. if value:
  174. tags.append((processor.tag, value))
  175. if data.get("tags"):
  176. tags += [(k, v) for k, v in data["tags"].items()]
  177. return tags
  178. def annotate_contexts(self, event):
  179. """
  180. SDK events may contain contexts. This function adds additional contexts data
  181. """
  182. contexts = event.get("contexts")
  183. for Processor in EVENT_CONTEXT_PROCESSORS:
  184. processor = Processor()
  185. if contexts is None or not contexts.get(processor.name):
  186. processor_contexts = processor.get_context(event)
  187. if processor_contexts:
  188. if contexts is None:
  189. contexts = {}
  190. contexts[processor.name] = processor_contexts
  191. return contexts
  192. def get_message(self, data):
  193. """ Prefer message over logentry """
  194. if "message" in data:
  195. return data["message"]
  196. return data.get("logentry", {}).get("message", "")
  197. def get_environment(self, name: str, project):
  198. environment, _ = Environment.objects.get_or_create(
  199. name=name[: Environment._meta.get_field("name").max_length],
  200. organization=project.organization,
  201. )
  202. environment.projects.add(project)
  203. return environment
  204. def get_release(self, version: str, project):
  205. release, _ = Release.objects.get_or_create(
  206. version=version, organization=project.organization
  207. )
  208. release.projects.add(project)
  209. return release
  210. def create(self, data):
  211. project = self.context.get("project")
  212. eventtype = self.get_eventtype()
  213. metadata = eventtype.get_metadata(data)
  214. title = eventtype.get_title(metadata)
  215. culprit = eventtype.get_location(data)
  216. request = data.get("request")
  217. breadcrumbs = data.get("breadcrumbs")
  218. exception = data.get("exception")
  219. level = None
  220. if data.get("level"):
  221. level = LogLevel.from_string(data["level"])
  222. if (
  223. data.get("stacktrace")
  224. and exception
  225. and len(exception.get("values", 0)) > 0
  226. and not exception["values"][0].get("stacktrace")
  227. ):
  228. # stacktrace is deprecated, but supported at this time
  229. # Assume it's for the first exception value
  230. exception["values"][0]["stacktrace"] = data.get("stacktrace")
  231. exception = self.modify_exception(exception)
  232. if request:
  233. headers = request.get("headers")
  234. if headers:
  235. request["inferred_content_type"] = headers.get("Content-Type")
  236. sorted_headers = sorted([pair for pair in headers.items()])
  237. for idx, header in enumerate(sorted_headers):
  238. if isinstance(header[1], list):
  239. sorted_headers[idx] = (header[0], header[1][0])
  240. request["headers"] = sorted_headers
  241. contexts = self.annotate_contexts(data)
  242. data["contexts"] = contexts
  243. with transaction.atomic():
  244. if not project.first_event:
  245. project.first_event = data.get("timestamp")
  246. project.save(update_fields=["first_event"])
  247. defaults = {
  248. "metadata": sanitize_bad_postgres_json(metadata),
  249. }
  250. if level:
  251. defaults["level"] = level
  252. environment = None
  253. if data.get("environment"):
  254. environment = self.get_environment(data["environment"], project)
  255. release = None
  256. if data.get("release"):
  257. release = self.get_release(data["release"], project)
  258. tags = []
  259. if environment:
  260. tags.append(("environment", environment.name))
  261. if release:
  262. tags.append(("release", release.version))
  263. tags = self.generate_tags(data, tags)
  264. defaults["tags"] = {tag[0]: [tag[1]] for tag in tags}
  265. issue, issue_created = Issue.objects.get_or_create(
  266. title=sanitize_bad_postgres_chars(title),
  267. culprit=sanitize_bad_postgres_chars(culprit),
  268. project_id=project.id,
  269. type=self.type,
  270. defaults=defaults,
  271. )
  272. json_data = {
  273. "breadcrumbs": breadcrumbs,
  274. "contexts": contexts,
  275. "culprit": culprit,
  276. "exception": exception,
  277. "metadata": metadata,
  278. "message": self.get_message(data),
  279. "modules": data.get("modules"),
  280. "platform": data.get("platform", "other"),
  281. "request": request,
  282. "sdk": data.get("sdk"),
  283. "title": title,
  284. "type": self.type.label,
  285. }
  286. if environment:
  287. json_data["environment"] = environment.name
  288. if data.get("logentry"):
  289. json_data["logentry"] = data.get("logentry")
  290. extra = data.get("extra")
  291. if extra:
  292. json_data["extra"] = extra
  293. user = self.process_user(project, data)
  294. if user:
  295. json_data["user"] = user
  296. errors = None
  297. handled_errors = self.context.get("handled_errors")
  298. if handled_errors:
  299. errors = []
  300. for field_name, field_errors in handled_errors.items():
  301. for error in field_errors:
  302. errors.append(
  303. {
  304. "reason": str(error),
  305. "type": error.code,
  306. "name": field_name,
  307. "value": error.value,
  308. }
  309. )
  310. params = {
  311. "event_id": data["event_id"],
  312. "issue": issue,
  313. "tags": {tag[0]: tag[1] for tag in tags},
  314. "errors": errors,
  315. "timestamp": data.get("timestamp"),
  316. "data": sanitize_bad_postgres_json(json_data),
  317. "release": release,
  318. }
  319. if level:
  320. params["level"] = level
  321. try:
  322. event = Event.objects.create(**params)
  323. except IntegrityError as e:
  324. # This except is more efficient than a query for exists().
  325. if e.args and "event_id" in e.args[0]:
  326. raise PermissionDenied(
  327. "An event with the same ID already exists (%s)"
  328. % params["event_id"]
  329. ) from e
  330. raise e
  331. issue.check_for_status_update()
  332. update_search_index_issue(args=[issue.pk, issue_created], countdown=10)
  333. return event
  334. class StoreErrorSerializer(StoreDefaultSerializer):
  335. """ Primary difference is the presense of exception attribute """
  336. type = EventType.ERROR
  337. exception = serializers.JSONField(required=False)
  338. stacktrace = serializers.JSONField(
  339. required=False, help_text="Deprecated but supported at this time"
  340. )
  341. class StoreCSPReportSerializer(BaseSerializer):
  342. """
  343. CSP Report Serializer
  344. Very different format from others Store serializers.
  345. Does not extend base class due to differences.
  346. """
  347. type = EventType.CSP
  348. def __init__(self, *args, **kwargs):
  349. super().__init__(*args, **kwargs)
  350. # This is done to support the hyphen
  351. self.fields.update({"csp-report": serializers.JSONField()})
  352. def create(self, data):
  353. project = self.context.get("project")
  354. csp = data["csp-report"]
  355. title = self.get_title(csp)
  356. culprit = self.get_culprit(csp)
  357. uri = self.get_uri(csp)
  358. directive = self.get_effective_directive(csp)
  359. metadata = {
  360. "message": title,
  361. "uri": uri,
  362. "directive": directive,
  363. }
  364. issue, _ = Issue.objects.get_or_create(
  365. title=title,
  366. culprit=culprit,
  367. project_id=project.id,
  368. type=EventType.CSP,
  369. defaults={"metadata": metadata},
  370. )
  371. # Convert - to _
  372. normalized_csp = dict((k.replace("-", "_"), v) for k, v in csp.items())
  373. if "effective_directive" not in normalized_csp:
  374. normalized_csp["effective_directive"] = directive
  375. json_data = {
  376. "culprit": culprit,
  377. "csp": normalized_csp,
  378. "title": title,
  379. "metadata": metadata,
  380. "message": title,
  381. "type": EventType.CSP.label,
  382. }
  383. user = self.process_user(project, data)
  384. if user:
  385. json_data["user"] = user
  386. params = {
  387. "issue": issue,
  388. "data": json_data,
  389. }
  390. return Event.objects.create(**params)
  391. def get_effective_directive(self, data):
  392. """
  393. Some browers return effective-directive and others don't.
  394. Infer missing ones from violated directive
  395. """
  396. if "effective-directive" in data:
  397. return data["effective-directive"]
  398. first_violation = data["violated-directive"].split()[0]
  399. return first_violation
  400. def get_uri(self, data):
  401. url = data["blocked-uri"]
  402. return urlparse(url).netloc
  403. def get_title(self, data):
  404. effective_directive = self.get_effective_directive(data)
  405. humanized_directive = effective_directive.replace("-src", "")
  406. uri = self.get_uri(data)
  407. return f"Blocked '{humanized_directive}' from '{uri}'"
  408. def get_culprit(self, data):
  409. # "style-src cdn.example.com"
  410. return data.get("violated-directive")
  411. class EnvelopeHeaderSerializer(serializers.Serializer):
  412. event_id = serializers.UUIDField(required=False)
  413. sent_at = FlexibleDateTimeField(required=False)