serializers.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. from typing import Dict, List, Tuple, Union
  2. from urllib.parse import urlparse
  3. from django.db import transaction
  4. from django.db.utils import IntegrityError
  5. from ipware import get_client_ip
  6. from anonymizeip import anonymize_ip
  7. from rest_framework import serializers
  8. from rest_framework.exceptions import PermissionDenied
  9. from sentry.eventtypes.error import ErrorEvent
  10. from sentry.eventtypes.base import DefaultEvent
  11. from issues.models import EventType, Issue
  12. from issues.serializers import BaseBreadcrumbsSerializer
  13. from environments.models import Environment
  14. from releases.models import Release
  15. from glitchtip.serializers import FlexibleDateTimeField
  16. from .models import Event, LogLevel
  17. from .fields import GenericField, ForgivingHStoreField, ForgivingDisallowRegexField
  18. from .event_tag_processors import TAG_PROCESSORS
  19. from .event_context_processors import EVENT_CONTEXT_PROCESSORS
  20. def replace(data: Union[str, dict, list], match: str, repl: str):
  21. """ A recursive replace function """
  22. if isinstance(data, dict):
  23. return {k: replace(v, match, repl) for k, v in data.items()}
  24. elif isinstance(data, list):
  25. return [replace(i, match, repl) for i in data]
  26. elif isinstance(data, str):
  27. return data.replace(match, repl)
  28. return data
  29. def sanitize_bad_postgres_chars(data: Union[str, dict, list]):
  30. """
  31. Remove values which are not supported by the postgres string data types
  32. """
  33. known_bads = ["\x00"]
  34. for known_bad in known_bads:
  35. data = data.replace(known_bad, " ")
  36. return data
  37. def sanitize_bad_postgres_json(data: Union[str, dict, list]):
  38. """
  39. Remove values which are not supported by the postgres JSONB data type
  40. """
  41. known_bads = ["\u0000"]
  42. for known_bad in known_bads:
  43. data = replace(data, known_bad, " ")
  44. return data
  45. class RequestSerializer(serializers.Serializer):
  46. env = serializers.DictField(
  47. child=serializers.CharField(allow_blank=True, allow_null=True), required=False
  48. )
  49. # Dict values can be both str and List[str]
  50. headers = serializers.DictField(required=False)
  51. url = serializers.CharField(required=False, allow_blank=True)
  52. method = serializers.CharField(required=False, allow_blank=True)
  53. query_string = serializers.CharField(
  54. required=False, allow_blank=True, allow_null=True
  55. )
  56. class BreadcrumbsSerializer(BaseBreadcrumbsSerializer):
  57. timestamp = GenericField(required=False)
  58. def validate_level(self, value):
  59. if value == "log":
  60. return "info"
  61. return value
  62. class BaseSerializer(serializers.Serializer):
  63. def process_user(self, project, data):
  64. """ Fetch user data from SDK event and request """
  65. user = data.get("user", {})
  66. if self.context and self.context.get("request"):
  67. client_ip, is_routable = get_client_ip(self.context["request"])
  68. if user or is_routable:
  69. if is_routable:
  70. if project.should_scrub_ip_addresses:
  71. client_ip = anonymize_ip(client_ip)
  72. user["ip_address"] = client_ip
  73. return user
  74. class SentrySDKEventSerializer(BaseSerializer):
  75. """ Represents events coming from a OSS sentry SDK client """
  76. breadcrumbs = serializers.JSONField(required=False)
  77. tags = ForgivingHStoreField(required=False)
  78. event_id = serializers.UUIDField()
  79. extra = serializers.JSONField(required=False)
  80. request = RequestSerializer(required=False)
  81. server_name = serializers.CharField(required=False)
  82. sdk = serializers.JSONField(required=False)
  83. platform = serializers.CharField(required=False)
  84. release = serializers.CharField(required=False, allow_null=True)
  85. environment = ForgivingDisallowRegexField(
  86. required=False, allow_null=True, disallow_regex=r"^[^\n\r\f\/]*$"
  87. )
  88. _meta = serializers.JSONField(required=False)
  89. class FormattedMessageSerializer(serializers.Serializer):
  90. formatted = serializers.CharField(
  91. required=False
  92. ) # Documented as required, but some Sentry SDKs don't send it
  93. messages = serializers.CharField(required=False)
  94. params = serializers.ListField(child=serializers.CharField(), required=False)
  95. def to_internal_value(self, data):
  96. value = super().to_internal_value(data)
  97. return value.get("formatted", "")
  98. class MessageField(serializers.CharField):
  99. def to_internal_value(self, data):
  100. if isinstance(data, dict):
  101. serializer = FormattedMessageSerializer(data=data)
  102. serializer.is_valid(raise_exception=True)
  103. return serializer.validated_data
  104. return super().to_internal_value(data)
  105. class StoreDefaultSerializer(SentrySDKEventSerializer):
  106. """
  107. Default serializer. Used as both a base class and for default error types
  108. """
  109. type = EventType.DEFAULT
  110. contexts = serializers.JSONField(required=False)
  111. level = serializers.CharField(required=False)
  112. logentry = serializers.JSONField(required=False)
  113. message = MessageField(required=False, allow_blank=True)
  114. timestamp = FlexibleDateTimeField(required=False)
  115. transaction = serializers.CharField(
  116. required=False, allow_null=True, allow_blank=True
  117. )
  118. user = serializers.JSONField(required=False)
  119. modules = serializers.JSONField(required=False)
  120. def validate_breadcrumbs(self, value):
  121. """
  122. Normalize breadcrumbs, which may come in as dict or list
  123. """
  124. if isinstance(value, list):
  125. value = {"values": value}
  126. if value.get("values") == []:
  127. return None
  128. serializer = BreadcrumbsSerializer(data=value.get("values"), many=True)
  129. if serializer.is_valid():
  130. return {"values": serializer.validated_data}
  131. return value
  132. def get_eventtype(self):
  133. """ Get event type class from self.type """
  134. if self.type is EventType.DEFAULT:
  135. return DefaultEvent()
  136. if self.type is EventType.ERROR:
  137. return ErrorEvent()
  138. def modify_exception(self, exception):
  139. """ OSS Sentry does this, I have no idea why """
  140. if exception:
  141. for value in exception.get("values", []):
  142. value.pop("module", None)
  143. if value.get("stacktrace") and value["stacktrace"].get("frames"):
  144. frames = value["stacktrace"]["frames"]
  145. # If in_app is always true, make it false ¯\_(ツ)_/¯
  146. if all(x.get("in_app") for x in frames):
  147. for frame in frames:
  148. frame["in_app"] = False
  149. return exception
  150. def generate_tags(self, data: Dict, tags: List[Tuple[str, str]] = []):
  151. """
  152. Determine tag relational data
  153. Optionally pass tags array for existing known tags to generate
  154. """
  155. for Processor in TAG_PROCESSORS:
  156. processor = Processor()
  157. value = processor.get_tag_values(data)
  158. if value:
  159. tags.append((processor.tag, value))
  160. if data.get("tags"):
  161. tags += [(k, v) for k, v in data["tags"].items()]
  162. return tags
  163. def annotate_contexts(self, event):
  164. """
  165. SDK events may contain contexts. This function adds additional contexts data
  166. """
  167. contexts = event.get("contexts")
  168. for Processor in EVENT_CONTEXT_PROCESSORS:
  169. processor = Processor()
  170. if contexts is None or not contexts.get(processor.name):
  171. processor_contexts = processor.get_context(event)
  172. if processor_contexts:
  173. if contexts is None:
  174. contexts = {}
  175. contexts[processor.name] = processor_contexts
  176. return contexts
  177. def get_message(self, data):
  178. """ Prefer message over logentry """
  179. if "message" in data:
  180. return data["message"]
  181. return data.get("logentry", {}).get("message", "")
  182. def get_environment(self, name: str, project):
  183. environment, _ = Environment.objects.get_or_create(
  184. name=name[: Environment._meta.get_field("name").max_length],
  185. organization=project.organization,
  186. )
  187. environment.projects.add(project)
  188. return environment
  189. def get_release(self, version: str, project):
  190. release, _ = Release.objects.get_or_create(
  191. version=version, organization=project.organization
  192. )
  193. release.projects.add(project)
  194. return release
  195. def create(self, data):
  196. project = self.context.get("project")
  197. eventtype = self.get_eventtype()
  198. metadata = eventtype.get_metadata(data)
  199. title = eventtype.get_title(metadata)
  200. culprit = eventtype.get_location(data)
  201. request = data.get("request")
  202. breadcrumbs = data.get("breadcrumbs")
  203. level = None
  204. if data.get("level"):
  205. level = LogLevel.from_string(data["level"])
  206. exception = self.modify_exception(data.get("exception"))
  207. if request:
  208. headers = request.get("headers")
  209. if headers:
  210. request["inferred_content_type"] = headers.get("Content-Type")
  211. sorted_headers = sorted([pair for pair in headers.items()])
  212. for idx, header in enumerate(sorted_headers):
  213. if isinstance(header[1], list):
  214. sorted_headers[idx] = (header[0], header[1][0])
  215. request["headers"] = sorted_headers
  216. contexts = self.annotate_contexts(data)
  217. data["contexts"] = contexts
  218. with transaction.atomic():
  219. if not project.first_event:
  220. project.first_event = data.get("timestamp")
  221. project.save(update_fields=["first_event"])
  222. defaults = {"metadata": sanitize_bad_postgres_json(metadata)}
  223. if level:
  224. defaults["level"] = level
  225. issue, _ = Issue.objects.get_or_create(
  226. title=sanitize_bad_postgres_chars(title),
  227. culprit=sanitize_bad_postgres_chars(culprit),
  228. project_id=project.id,
  229. type=self.type,
  230. defaults=defaults,
  231. )
  232. environment = None
  233. if data.get("environment"):
  234. environment = self.get_environment(data["environment"], project)
  235. release = None
  236. if data.get("release"):
  237. release = self.get_release(data["release"], project)
  238. tags = []
  239. if environment:
  240. tags.append(("environment", environment.name))
  241. if release:
  242. tags.append(("release", release.version))
  243. tags = self.generate_tags(data, tags)
  244. tags = {tag[0]: tag[1] for tag in tags}
  245. json_data = {
  246. "breadcrumbs": breadcrumbs,
  247. "contexts": contexts,
  248. "culprit": culprit,
  249. "exception": exception,
  250. "metadata": metadata,
  251. "message": self.get_message(data),
  252. "modules": data.get("modules"),
  253. "platform": data.get("platform", "other"),
  254. "request": request,
  255. "sdk": data.get("sdk"),
  256. "title": title,
  257. "type": self.type.label,
  258. }
  259. if environment:
  260. json_data["environment"] = environment.name
  261. extra = data.get("extra")
  262. if extra:
  263. json_data["extra"] = extra
  264. user = self.process_user(project, data)
  265. if user:
  266. json_data["user"] = user
  267. errors = None
  268. handled_errors = self.context.get("handled_errors")
  269. if handled_errors:
  270. errors = []
  271. for field_name, field_errors in handled_errors.items():
  272. for error in field_errors:
  273. errors.append(
  274. {
  275. "reason": str(error),
  276. "type": error.code,
  277. "name": field_name,
  278. "value": error.value,
  279. }
  280. )
  281. params = {
  282. "event_id": data["event_id"],
  283. "issue": issue,
  284. "tags": tags,
  285. "errors": errors,
  286. "timestamp": data.get("timestamp"),
  287. "data": sanitize_bad_postgres_json(json_data),
  288. "release": release,
  289. }
  290. if level:
  291. params["level"] = level
  292. try:
  293. event = Event.objects.create(**params)
  294. except IntegrityError as e:
  295. # This except is more efficient than a query for exists().
  296. if e.args and "event_id" in e.args[0]:
  297. raise PermissionDenied(
  298. "An event with the same ID already exists (%s)"
  299. % params["event_id"]
  300. ) from e
  301. raise e
  302. issue.check_for_status_update()
  303. return event
  304. class StoreErrorSerializer(StoreDefaultSerializer):
  305. """ Primary difference is the presense of exception attribute """
  306. type = EventType.ERROR
  307. exception = serializers.JSONField(required=False)
  308. class StoreCSPReportSerializer(BaseSerializer):
  309. """
  310. CSP Report Serializer
  311. Very different format from others Store serializers.
  312. Does not extend base class due to differences.
  313. """
  314. type = EventType.CSP
  315. def __init__(self, *args, **kwargs):
  316. super().__init__(*args, **kwargs)
  317. # This is done to support the hyphen
  318. self.fields.update({"csp-report": serializers.JSONField()})
  319. def create(self, data):
  320. project = self.context.get("project")
  321. csp = data["csp-report"]
  322. title = self.get_title(csp)
  323. culprit = self.get_culprit(csp)
  324. uri = self.get_uri(csp)
  325. directive = self.get_effective_directive(csp)
  326. metadata = {
  327. "message": title,
  328. "uri": uri,
  329. "directive": directive,
  330. }
  331. issue, _ = Issue.objects.get_or_create(
  332. title=title,
  333. culprit=culprit,
  334. project_id=project.id,
  335. type=EventType.CSP,
  336. defaults={"metadata": metadata},
  337. )
  338. # Convert - to _
  339. normalized_csp = dict((k.replace("-", "_"), v) for k, v in csp.items())
  340. if "effective_directive" not in normalized_csp:
  341. normalized_csp["effective_directive"] = directive
  342. json_data = {
  343. "culprit": culprit,
  344. "csp": normalized_csp,
  345. "title": title,
  346. "metadata": metadata,
  347. "message": title,
  348. "type": EventType.CSP.label,
  349. }
  350. user = self.process_user(project, data)
  351. if user:
  352. json_data["user"] = user
  353. params = {
  354. "issue": issue,
  355. "data": json_data,
  356. }
  357. return Event.objects.create(**params)
  358. def get_effective_directive(self, data):
  359. """
  360. Some browers return effective-directive and others don't.
  361. Infer missing ones from violated directive
  362. """
  363. if "effective-directive" in data:
  364. return data["effective-directive"]
  365. first_violation = data["violated-directive"].split()[0]
  366. return first_violation
  367. def get_uri(self, data):
  368. url = data["blocked-uri"]
  369. return urlparse(url).netloc
  370. def get_title(self, data):
  371. effective_directive = self.get_effective_directive(data)
  372. humanized_directive = effective_directive.replace("-src", "")
  373. uri = self.get_uri(data)
  374. return f"Blocked '{humanized_directive}' from '{uri}'"
  375. def get_culprit(self, data):
  376. # "style-src cdn.example.com"
  377. return data.get("violated-directive")
  378. class EnvelopeHeaderSerializer(serializers.Serializer):
  379. event_id = serializers.UUIDField(required=False)
  380. sent_at = FlexibleDateTimeField(required=False)