serializers.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. import uuid
  2. from typing import Dict, List, Tuple, Union
  3. from urllib.parse import urlparse
  4. from django.db import transaction
  5. from django.db.utils import IntegrityError
  6. from ipware import get_client_ip
  7. from anonymizeip import anonymize_ip
  8. from rest_framework import serializers
  9. from rest_framework.exceptions import PermissionDenied
  10. from sentry.eventtypes.error import ErrorEvent
  11. from sentry.eventtypes.base import DefaultEvent
  12. from issues.models import EventType, Issue
  13. from issues.serializers import BaseBreadcrumbsSerializer
  14. from issues.tasks import update_search_index_issue
  15. from environments.models import Environment
  16. from releases.models import Release
  17. from glitchtip.serializers import FlexibleDateTimeField
  18. from .models import Event, LogLevel
  19. from .fields import (
  20. GenericField,
  21. ForgivingHStoreField,
  22. ForgivingDisallowRegexField,
  23. QueryStringField,
  24. )
  25. from .event_tag_processors import TAG_PROCESSORS
  26. from .event_context_processors import EVENT_CONTEXT_PROCESSORS
  27. from .event_processors import EVENT_PROCESSORS
  28. def replace(data: Union[str, dict, list], match: str, repl: str):
  29. """A recursive replace function"""
  30. if isinstance(data, dict):
  31. return {k: replace(v, match, repl) for k, v in data.items()}
  32. elif isinstance(data, list):
  33. return [replace(i, match, repl) for i in data]
  34. elif isinstance(data, str):
  35. return data.replace(match, repl)
  36. return data
  37. def sanitize_bad_postgres_chars(data: Union[str, dict, list]):
  38. """
  39. Remove values which are not supported by the postgres string data types
  40. """
  41. known_bads = ["\x00"]
  42. for known_bad in known_bads:
  43. data = data.replace(known_bad, " ")
  44. return data
  45. def sanitize_bad_postgres_json(data: Union[str, dict, list]):
  46. """
  47. Remove values which are not supported by the postgres JSONB data type
  48. """
  49. known_bads = ["\u0000"]
  50. for known_bad in known_bads:
  51. data = replace(data, known_bad, " ")
  52. return data
  53. class RequestSerializer(serializers.Serializer):
  54. env = serializers.DictField(
  55. child=serializers.CharField(allow_blank=True, allow_null=True), required=False
  56. )
  57. # Dict values can be both str and List[str]
  58. headers = serializers.DictField(required=False)
  59. url = serializers.CharField(required=False, allow_blank=True)
  60. method = serializers.CharField(required=False, allow_blank=True)
  61. query_string = QueryStringField(required=False, allow_null=True)
  62. class BreadcrumbsSerializer(BaseBreadcrumbsSerializer):
  63. timestamp = GenericField(required=False)
  64. def validate_level(self, value):
  65. if value == "log":
  66. return "info"
  67. return value
  68. class BaseSerializer(serializers.Serializer):
  69. def process_user(self, project, data):
  70. """Fetch user data from SDK event and request"""
  71. user = data.get("user", {})
  72. if self.context and self.context.get("request"):
  73. client_ip, is_routable = get_client_ip(self.context["request"])
  74. if user or is_routable:
  75. if is_routable:
  76. if project.should_scrub_ip_addresses:
  77. client_ip = anonymize_ip(client_ip)
  78. user["ip_address"] = client_ip
  79. return user
  80. class SentrySDKEventSerializer(BaseSerializer):
  81. """Represents events coming from a OSS sentry SDK client"""
  82. breadcrumbs = serializers.JSONField(required=False)
  83. tags = ForgivingHStoreField(required=False)
  84. event_id = serializers.UUIDField(required=False, default=uuid.uuid4)
  85. extra = serializers.JSONField(required=False)
  86. request = RequestSerializer(required=False)
  87. server_name = serializers.CharField(required=False)
  88. sdk = serializers.JSONField(required=False)
  89. platform = serializers.CharField(required=False)
  90. release = serializers.CharField(required=False, allow_null=True)
  91. environment = ForgivingDisallowRegexField(
  92. required=False, allow_null=True, disallow_regex=r"^[^\n\r\f\/]*$"
  93. )
  94. _meta = serializers.JSONField(required=False)
  95. class FormattedMessageSerializer(serializers.Serializer):
  96. formatted = serializers.CharField(
  97. required=False
  98. ) # Documented as required, but some Sentry SDKs don't send it
  99. messages = serializers.CharField(required=False)
  100. params = serializers.ListField(child=serializers.CharField(), required=False)
  101. def to_internal_value(self, data):
  102. value = super().to_internal_value(data)
  103. return value.get("formatted", "")
  104. class MessageField(serializers.CharField):
  105. def to_internal_value(self, data):
  106. if isinstance(data, dict):
  107. serializer = FormattedMessageSerializer(data=data)
  108. serializer.is_valid(raise_exception=True)
  109. return serializer.validated_data
  110. return super().to_internal_value(data)
  111. class LogEntrySerializer(serializers.Serializer):
  112. formatted = serializers.CharField(required=False)
  113. message = serializers.CharField(required=False)
  114. params = serializers.JSONField(required=False)
  115. def validate(self, attrs):
  116. data = super().validate(attrs)
  117. if not data.get("formatted") and data.get("params"):
  118. params = data["params"]
  119. if isinstance(params, list):
  120. data["formatted"] = data["message"] % tuple(data["params"])
  121. return data
  122. class StoreDefaultSerializer(SentrySDKEventSerializer):
  123. """
  124. Default serializer. Used as both a base class and for default error types
  125. """
  126. type = EventType.DEFAULT
  127. contexts = serializers.JSONField(required=False)
  128. level = serializers.CharField(required=False)
  129. logentry = LogEntrySerializer(required=False)
  130. message = MessageField(required=False, allow_blank=True, allow_null=True)
  131. timestamp = FlexibleDateTimeField(required=False)
  132. transaction = serializers.CharField(
  133. required=False, allow_null=True, allow_blank=True
  134. )
  135. user = serializers.JSONField(required=False)
  136. modules = serializers.JSONField(required=False)
  137. def validate_breadcrumbs(self, value):
  138. """
  139. Normalize breadcrumbs, which may come in as dict or list
  140. """
  141. if isinstance(value, list):
  142. value = {"values": value}
  143. if value.get("values") == []:
  144. return None
  145. serializer = BreadcrumbsSerializer(data=value.get("values"), many=True)
  146. if serializer.is_valid():
  147. return {"values": serializer.validated_data}
  148. return value
  149. def get_eventtype(self):
  150. """Get event type class from self.type"""
  151. if self.type is EventType.DEFAULT:
  152. return DefaultEvent()
  153. if self.type is EventType.ERROR:
  154. return ErrorEvent()
  155. def modify_exception(self, exception):
  156. """OSS Sentry does this, I have no idea why"""
  157. if exception:
  158. for value in exception.get("values", []):
  159. value.pop("module", None)
  160. if value.get("stacktrace") and value["stacktrace"].get("frames"):
  161. frames = value["stacktrace"]["frames"]
  162. # If in_app is always true, make it false ¯\_(ツ)_/¯
  163. if all(x.get("in_app") for x in frames):
  164. for frame in frames:
  165. frame["in_app"] = False
  166. return exception
  167. def generate_tags(self, data: Dict, tags: List[Tuple[str, str]] = []):
  168. """
  169. Determine tag relational data
  170. Optionally pass tags array for existing known tags to generate
  171. """
  172. for Processor in TAG_PROCESSORS:
  173. processor = Processor()
  174. value = processor.get_tag_values(data)
  175. if value:
  176. tags.append((processor.tag, value))
  177. if data.get("tags"):
  178. tags += [(k, v) for k, v in data["tags"].items()]
  179. return tags
  180. def annotate_contexts(self, event):
  181. """
  182. SDK events may contain contexts. This function adds additional contexts data
  183. """
  184. contexts = event.get("contexts")
  185. for Processor in EVENT_CONTEXT_PROCESSORS:
  186. processor = Processor()
  187. if contexts is None or not contexts.get(processor.name):
  188. processor_contexts = processor.get_context(event)
  189. if processor_contexts:
  190. if contexts is None:
  191. contexts = {}
  192. contexts[processor.name] = processor_contexts
  193. return contexts
  194. def get_message(self, data):
  195. """Prefer message over logentry"""
  196. if "message" in data:
  197. return data["message"]
  198. return data.get("logentry", {}).get("message", "")
  199. def get_environment(self, name: str, project):
  200. environment, _ = Environment.objects.get_or_create(
  201. name=name[: Environment._meta.get_field("name").max_length],
  202. organization=project.organization,
  203. )
  204. environment.projects.add(project)
  205. return environment
  206. def get_release(self, version: str, project):
  207. release, _ = Release.objects.get_or_create(
  208. version=version, organization=project.organization
  209. )
  210. release.projects.add(project)
  211. return release
  212. def is_url(self, filename: str) -> bool:
  213. return filename.startswith(("file:", "http:", "https:", "applewebdata:"))
  214. def normalize_stacktrace(self, stacktrace):
  215. """
  216. Port of semaphore store/normalize/stacktrace.rs
  217. """
  218. if not stacktrace:
  219. return
  220. for frame in stacktrace.get("frames", []):
  221. if not frame.get("abs_path") and frame.get("filename"):
  222. frame["abs_path"] = frame["filename"]
  223. if frame.get("filename") and self.is_url(frame["filename"]):
  224. frame["filename"] = urlparse(frame["filename"]).path
  225. def create(self, validated_data):
  226. data = validated_data
  227. project = self.context.get("project")
  228. eventtype = self.get_eventtype()
  229. metadata = eventtype.get_metadata(data)
  230. exception = data.get("exception")
  231. if (
  232. data.get("stacktrace")
  233. and exception
  234. and len(exception.get("values", 0)) > 0
  235. and not exception["values"][0].get("stacktrace")
  236. ):
  237. # stacktrace is deprecated, but supported at this time
  238. # Assume it's for the first exception value
  239. exception["values"][0]["stacktrace"] = data.get("stacktrace")
  240. exception = self.modify_exception(exception)
  241. if isinstance(exception, dict):
  242. for value in exception.get("values", []):
  243. self.normalize_stacktrace(value.get("stacktrace"))
  244. release = None
  245. if data.get("release"):
  246. release = self.get_release(data["release"], project)
  247. for Processor in EVENT_PROCESSORS:
  248. Processor(project, release, data).run()
  249. title = eventtype.get_title(metadata)
  250. culprit = eventtype.get_location(data)
  251. request = data.get("request")
  252. breadcrumbs = data.get("breadcrumbs")
  253. level = None
  254. if data.get("level"):
  255. level = LogLevel.from_string(data["level"])
  256. if request:
  257. headers = request.get("headers")
  258. if headers:
  259. request["inferred_content_type"] = headers.get("Content-Type")
  260. sorted_headers = sorted([pair for pair in headers.items()])
  261. for idx, header in enumerate(sorted_headers):
  262. if isinstance(header[1], list):
  263. sorted_headers[idx] = (header[0], header[1][0])
  264. request["headers"] = sorted_headers
  265. contexts = self.annotate_contexts(data)
  266. data["contexts"] = contexts
  267. with transaction.atomic():
  268. if not project.first_event:
  269. project.first_event = data.get("timestamp")
  270. project.save(update_fields=["first_event"])
  271. defaults = {
  272. "metadata": sanitize_bad_postgres_json(metadata),
  273. }
  274. if level:
  275. defaults["level"] = level
  276. environment = None
  277. if data.get("environment"):
  278. environment = self.get_environment(data["environment"], project)
  279. tags = []
  280. if environment:
  281. tags.append(("environment", environment.name))
  282. if release:
  283. tags.append(("release", release.version))
  284. tags = self.generate_tags(data, tags)
  285. defaults["tags"] = {tag[0]: [tag[1]] for tag in tags}
  286. issue, issue_created = Issue.objects.get_or_create(
  287. title=sanitize_bad_postgres_chars(title),
  288. culprit=sanitize_bad_postgres_chars(culprit),
  289. project_id=project.id,
  290. type=self.type,
  291. defaults=defaults,
  292. )
  293. json_data = {
  294. "breadcrumbs": breadcrumbs,
  295. "contexts": contexts,
  296. "culprit": culprit,
  297. "exception": exception,
  298. "metadata": metadata,
  299. "message": self.get_message(data),
  300. "modules": data.get("modules"),
  301. "platform": data.get("platform", "other"),
  302. "request": request,
  303. "sdk": data.get("sdk"),
  304. "title": title,
  305. "type": self.type.label,
  306. }
  307. if environment:
  308. json_data["environment"] = environment.name
  309. if data.get("logentry"):
  310. json_data["logentry"] = data.get("logentry")
  311. extra = data.get("extra")
  312. if extra:
  313. json_data["extra"] = extra
  314. user = self.process_user(project, data)
  315. if user:
  316. json_data["user"] = user
  317. errors = None
  318. handled_errors = self.context.get("handled_errors")
  319. if handled_errors:
  320. errors = []
  321. for field_name, field_errors in handled_errors.items():
  322. for error in field_errors:
  323. errors.append(
  324. {
  325. "reason": str(error),
  326. "type": error.code,
  327. "name": field_name,
  328. "value": error.value,
  329. }
  330. )
  331. params = {
  332. "event_id": data["event_id"],
  333. "issue": issue,
  334. "tags": {tag[0]: tag[1] for tag in tags},
  335. "errors": errors,
  336. "timestamp": data.get("timestamp"),
  337. "data": sanitize_bad_postgres_json(json_data),
  338. "release": release,
  339. }
  340. if level:
  341. params["level"] = level
  342. try:
  343. event = Event.objects.create(**params)
  344. except IntegrityError as e:
  345. # This except is more efficient than a query for exists().
  346. if e.args and "event_id" in e.args[0]:
  347. raise PermissionDenied(
  348. "An event with the same ID already exists (%s)"
  349. % params["event_id"]
  350. ) from e
  351. raise e
  352. issue.check_for_status_update()
  353. update_search_index_issue(args=[issue.pk, issue_created], countdown=10)
  354. return event
  355. class StoreErrorSerializer(StoreDefaultSerializer):
  356. """Primary difference is the presense of exception attribute"""
  357. type = EventType.ERROR
  358. exception = serializers.JSONField(required=False)
  359. stacktrace = serializers.JSONField(
  360. required=False, help_text="Deprecated but supported at this time"
  361. )
  362. class StoreCSPReportSerializer(BaseSerializer):
  363. """
  364. CSP Report Serializer
  365. Very different format from others Store serializers.
  366. Does not extend base class due to differences.
  367. """
  368. type = EventType.CSP
  369. def __init__(self, *args, **kwargs):
  370. super().__init__(*args, **kwargs)
  371. # This is done to support the hyphen
  372. self.fields.update({"csp-report": serializers.JSONField()})
  373. def create(self, data):
  374. project = self.context.get("project")
  375. csp = data["csp-report"]
  376. title = self.get_title(csp)
  377. culprit = self.get_culprit(csp)
  378. uri = self.get_uri(csp)
  379. directive = self.get_effective_directive(csp)
  380. metadata = {
  381. "message": title,
  382. "uri": uri,
  383. "directive": directive,
  384. }
  385. issue, _ = Issue.objects.get_or_create(
  386. title=title,
  387. culprit=culprit,
  388. project_id=project.id,
  389. type=EventType.CSP,
  390. defaults={"metadata": metadata},
  391. )
  392. # Convert - to _
  393. normalized_csp = dict((k.replace("-", "_"), v) for k, v in csp.items())
  394. if "effective_directive" not in normalized_csp:
  395. normalized_csp["effective_directive"] = directive
  396. json_data = {
  397. "culprit": culprit,
  398. "csp": normalized_csp,
  399. "title": title,
  400. "metadata": metadata,
  401. "message": title,
  402. "type": EventType.CSP.label,
  403. }
  404. user = self.process_user(project, data)
  405. if user:
  406. json_data["user"] = user
  407. params = {
  408. "issue": issue,
  409. "data": json_data,
  410. }
  411. return Event.objects.create(**params)
  412. def get_effective_directive(self, data):
  413. """
  414. Some browers return effective-directive and others don't.
  415. Infer missing ones from violated directive
  416. """
  417. if "effective-directive" in data:
  418. return data["effective-directive"]
  419. first_violation = data["violated-directive"].split()[0]
  420. return first_violation
  421. def get_uri(self, data):
  422. url = data["blocked-uri"]
  423. return urlparse(url).netloc
  424. def get_title(self, data):
  425. effective_directive = self.get_effective_directive(data)
  426. humanized_directive = effective_directive.replace("-src", "")
  427. uri = self.get_uri(data)
  428. return f"Blocked '{humanized_directive}' from '{uri}'"
  429. def get_culprit(self, data):
  430. # "style-src cdn.example.com"
  431. return data.get("violated-directive")
  432. class EnvelopeHeaderSerializer(serializers.Serializer):
  433. event_id = serializers.UUIDField(required=False)
  434. sent_at = FlexibleDateTimeField(required=False)