serializers.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. import uuid
  2. from typing import Dict, List, Tuple, Union
  3. from urllib.parse import urlparse
  4. from anonymizeip import anonymize_ip
  5. from django.db import transaction
  6. from django.db.utils import IntegrityError
  7. from ipware import get_client_ip
  8. from rest_framework import serializers
  9. from rest_framework.exceptions import PermissionDenied
  10. from environments.models import Environment
  11. from glitchtip.serializers import FlexibleDateTimeField
  12. from issues.models import EventType, Issue
  13. from issues.serializers import BaseBreadcrumbsSerializer
  14. from issues.tasks import update_search_index_issue
  15. from releases.models import Release
  16. from sentry.eventtypes.base import DefaultEvent
  17. from sentry.eventtypes.error import ErrorEvent
  18. from .event_context_processors import EVENT_CONTEXT_PROCESSORS
  19. from .event_processors import EVENT_PROCESSORS
  20. from .event_tag_processors import TAG_PROCESSORS
  21. from .fields import (
  22. ForgivingDisallowRegexField,
  23. ForgivingHStoreField,
  24. GenericField,
  25. QueryStringField,
  26. )
  27. from .models import Event, LogLevel
  28. def replace(data: Union[str, dict, list], match: str, repl: str):
  29. """A recursive replace function"""
  30. if isinstance(data, dict):
  31. return {k: replace(v, match, repl) for k, v in data.items()}
  32. elif isinstance(data, list):
  33. return [replace(i, match, repl) for i in data]
  34. elif isinstance(data, str):
  35. return data.replace(match, repl)
  36. return data
  37. def sanitize_bad_postgres_chars(data: Union[str, dict, list]):
  38. """
  39. Remove values which are not supported by the postgres string data types
  40. """
  41. known_bads = ["\x00"]
  42. for known_bad in known_bads:
  43. data = data.replace(known_bad, " ")
  44. return data
  45. def sanitize_bad_postgres_json(data: Union[str, dict, list]):
  46. """
  47. Remove values which are not supported by the postgres JSONB data type
  48. """
  49. known_bads = ["\u0000"]
  50. for known_bad in known_bads:
  51. data = replace(data, known_bad, " ")
  52. return data
  53. class RequestSerializer(serializers.Serializer):
  54. env = serializers.DictField(
  55. child=serializers.CharField(allow_blank=True, allow_null=True), required=False
  56. )
  57. # Dict values can be both str and List[str]
  58. headers = serializers.DictField(required=False)
  59. url = serializers.CharField(required=False, allow_blank=True)
  60. method = serializers.CharField(required=False, allow_blank=True)
  61. query_string = QueryStringField(required=False, allow_null=True)
  62. class BreadcrumbsSerializer(BaseBreadcrumbsSerializer):
  63. timestamp = GenericField(required=False)
  64. def validate_level(self, value):
  65. if value == "log":
  66. return "info"
  67. return value
  68. class BaseSerializer(serializers.Serializer):
  69. def process_user(self, project, data):
  70. """Fetch user data from SDK event and request"""
  71. user = data.get("user", {})
  72. if self.context and self.context.get("request"):
  73. client_ip, is_routable = get_client_ip(self.context["request"])
  74. if user or is_routable:
  75. if is_routable:
  76. if project.should_scrub_ip_addresses:
  77. client_ip = anonymize_ip(client_ip)
  78. user["ip_address"] = client_ip
  79. return user
  80. class SentrySDKEventSerializer(BaseSerializer):
  81. """Represents events coming from a OSS sentry SDK client"""
  82. breadcrumbs = serializers.JSONField(required=False)
  83. tags = ForgivingHStoreField(required=False)
  84. event_id = serializers.UUIDField(required=False, default=uuid.uuid4)
  85. extra = serializers.JSONField(required=False)
  86. request = RequestSerializer(required=False)
  87. server_name = serializers.CharField(required=False)
  88. sdk = serializers.JSONField(required=False)
  89. platform = serializers.CharField(required=False)
  90. release = serializers.CharField(required=False, allow_null=True, allow_blank=True)
  91. environment = ForgivingDisallowRegexField(
  92. required=False, allow_null=True, disallow_regex=r"^[^\n\r\f\/]*$"
  93. )
  94. _meta = serializers.JSONField(required=False)
  95. def get_environment(self, name: str, project):
  96. environment, _ = Environment.objects.get_or_create(
  97. name=name[: Environment._meta.get_field("name").max_length],
  98. organization=project.organization,
  99. )
  100. environment.projects.add(project)
  101. return environment
  102. def get_release(self, version: str, project):
  103. release, _ = Release.objects.get_or_create(
  104. version=version, organization=project.organization
  105. )
  106. release.projects.add(project)
  107. return release
  108. class FormattedMessageSerializer(serializers.Serializer):
  109. formatted = serializers.CharField(
  110. required=False
  111. ) # Documented as required, but some Sentry SDKs don't send it
  112. message = serializers.CharField(required=False)
  113. params = serializers.JSONField(required=False)
  114. def validate(self, attrs):
  115. data = super().validate(attrs)
  116. if not data.get("formatted") and data.get("params"):
  117. params = data["params"]
  118. if isinstance(params, list):
  119. data["formatted"] = data["message"] % tuple(params)
  120. elif isinstance(params, dict):
  121. data["formatted"] = data["message"].format(**params)
  122. return data
  123. # OSS Sentry only keeps unformatted "message" when it creates a formatted message
  124. return {key: data[key] for key in data if key != "message"}
  125. class MessageField(serializers.CharField):
  126. def to_internal_value(self, data):
  127. if isinstance(data, dict):
  128. serializer = FormattedMessageSerializer(data=data)
  129. serializer.is_valid(raise_exception=True)
  130. return serializer.validated_data
  131. return super().to_internal_value(data)
  132. class LogEntrySerializer(serializers.Serializer):
  133. formatted = serializers.CharField(required=False)
  134. message = serializers.CharField(required=False)
  135. params = serializers.JSONField(required=False)
  136. def validate(self, attrs):
  137. data = super().validate(attrs)
  138. if not data.get("formatted") and data.get("params"):
  139. params = data["params"]
  140. if isinstance(params, list):
  141. data["formatted"] = data["message"] % tuple(data["params"])
  142. elif isinstance(params, dict):
  143. data["formatted"] = data["message"].format(**params)
  144. return data
  145. class StoreDefaultSerializer(SentrySDKEventSerializer):
  146. """
  147. Default serializer. Used as both a base class and for default error types
  148. """
  149. type = EventType.DEFAULT
  150. contexts = serializers.JSONField(required=False)
  151. level = serializers.CharField(required=False)
  152. logentry = LogEntrySerializer(required=False)
  153. message = MessageField(required=False, allow_blank=True, allow_null=True)
  154. timestamp = FlexibleDateTimeField(required=False)
  155. transaction = serializers.CharField(
  156. required=False, allow_null=True, allow_blank=True
  157. )
  158. user = serializers.JSONField(required=False)
  159. modules = serializers.JSONField(required=False)
  160. def validate_breadcrumbs(self, value):
  161. """
  162. Normalize breadcrumbs, which may come in as dict or list
  163. """
  164. if isinstance(value, list):
  165. value = {"values": value}
  166. if value.get("values") == []:
  167. return None
  168. serializer = BreadcrumbsSerializer(data=value.get("values"), many=True)
  169. if serializer.is_valid():
  170. return {"values": serializer.validated_data}
  171. return value
  172. def get_eventtype(self):
  173. """Get event type class from self.type"""
  174. if self.type is EventType.DEFAULT:
  175. return DefaultEvent()
  176. if self.type is EventType.ERROR:
  177. return ErrorEvent()
  178. def modify_exception(self, exception):
  179. """OSS Sentry does this, I have no idea why"""
  180. if exception:
  181. for value in exception.get("values", []):
  182. value.pop("module", None)
  183. if value.get("stacktrace") and value["stacktrace"].get("frames"):
  184. frames = value["stacktrace"]["frames"]
  185. # If in_app is always true, make it false ¯\_(ツ)_/¯
  186. if all(x.get("in_app") for x in frames):
  187. for frame in frames:
  188. frame["in_app"] = False
  189. return exception
  190. def generate_tags(self, data: Dict, tags: List[Tuple[str, str]] = None):
  191. """
  192. Determine tag relational data
  193. Optionally pass tags array for existing known tags to generate
  194. """
  195. if tags is None:
  196. tags = []
  197. for Processor in TAG_PROCESSORS:
  198. processor = Processor()
  199. value = processor.get_tag_values(data)
  200. if value:
  201. tags.append((processor.tag, value))
  202. if data.get("tags"):
  203. tags += [(k, v) for k, v in data["tags"].items()]
  204. return tags
  205. def annotate_contexts(self, event):
  206. """
  207. SDK events may contain contexts. This function adds additional contexts data
  208. """
  209. contexts = event.get("contexts")
  210. for Processor in EVENT_CONTEXT_PROCESSORS:
  211. processor = Processor()
  212. if contexts is None or not contexts.get(processor.name):
  213. processor_contexts = processor.get_context(event)
  214. if processor_contexts:
  215. if contexts is None:
  216. contexts = {}
  217. contexts[processor.name] = processor_contexts
  218. return contexts
  219. def get_message(self, data):
  220. """Prefer message over logentry"""
  221. if "message" in data:
  222. if isinstance(data["message"], dict):
  223. return data["message"].get("formatted") or data["message"].get(
  224. "message", ""
  225. )
  226. return data["message"]
  227. return data.get("logentry", {}).get("message", "")
  228. def get_logentry(self, data):
  229. if "logentry" in data:
  230. return data.get("logentry")
  231. elif "message" in data:
  232. message = data["message"]
  233. if isinstance(message, dict):
  234. return message
  235. return {"formatted": message}
  236. def is_url(self, filename: str) -> bool:
  237. return filename.startswith(("file:", "http:", "https:", "applewebdata:"))
  238. def normalize_stacktrace(self, stacktrace):
  239. """
  240. Port of semaphore store/normalize/stacktrace.rs
  241. """
  242. if not stacktrace:
  243. return
  244. for frame in stacktrace.get("frames", []):
  245. if not frame.get("abs_path") and frame.get("filename"):
  246. frame["abs_path"] = frame["filename"]
  247. if frame.get("filename") and self.is_url(frame["filename"]):
  248. frame["filename"] = urlparse(frame["filename"]).path
  249. def create(self, validated_data):
  250. data = validated_data
  251. project = self.context.get("project")
  252. eventtype = self.get_eventtype()
  253. metadata = eventtype.get_metadata(data)
  254. exception = data.get("exception")
  255. if (
  256. data.get("stacktrace")
  257. and exception
  258. and len(exception.get("values", 0)) > 0
  259. and not exception["values"][0].get("stacktrace")
  260. ):
  261. # stacktrace is deprecated, but supported at this time
  262. # Assume it's for the first exception value
  263. exception["values"][0]["stacktrace"] = data.get("stacktrace")
  264. exception = self.modify_exception(exception)
  265. if isinstance(exception, dict):
  266. for value in exception.get("values", []):
  267. self.normalize_stacktrace(value.get("stacktrace"))
  268. if release := data.get("release"):
  269. release = self.get_release(release, project)
  270. for Processor in EVENT_PROCESSORS:
  271. Processor(project, release, data).run()
  272. title = eventtype.get_title(metadata)
  273. culprit = eventtype.get_location(data)
  274. request = data.get("request")
  275. breadcrumbs = data.get("breadcrumbs")
  276. level = None
  277. if data.get("level"):
  278. level = LogLevel.from_string(data["level"])
  279. if request:
  280. headers = request.get("headers")
  281. if headers:
  282. request["inferred_content_type"] = headers.get("Content-Type")
  283. sorted_headers = sorted([pair for pair in headers.items()])
  284. for idx, header in enumerate(sorted_headers):
  285. if isinstance(header[1], list):
  286. sorted_headers[idx] = (header[0], header[1][0])
  287. request["headers"] = sorted_headers
  288. contexts = self.annotate_contexts(data)
  289. data["contexts"] = contexts
  290. with transaction.atomic():
  291. if not project.first_event:
  292. project.first_event = data.get("timestamp")
  293. project.save(update_fields=["first_event"])
  294. defaults = {
  295. "metadata": sanitize_bad_postgres_json(metadata),
  296. }
  297. if level:
  298. defaults["level"] = level
  299. if environment := data.get("environment"):
  300. environment = self.get_environment(data["environment"], project)
  301. tags = []
  302. if environment:
  303. tags.append(("environment", environment.name))
  304. if release:
  305. tags.append(("release", release.version))
  306. else:
  307. release = None # Anything falsey should be None
  308. tags = self.generate_tags(data, tags)
  309. defaults["tags"] = {tag[0]: [tag[1]] for tag in tags}
  310. issue, _ = Issue.objects.get_or_create(
  311. title=sanitize_bad_postgres_chars(title),
  312. culprit=sanitize_bad_postgres_chars(culprit),
  313. project_id=project.id,
  314. type=self.type,
  315. defaults=defaults,
  316. )
  317. json_data = {
  318. "breadcrumbs": breadcrumbs,
  319. "contexts": contexts,
  320. "culprit": culprit,
  321. "exception": exception,
  322. "logentry": self.get_logentry(data),
  323. "metadata": metadata,
  324. "message": self.get_message(data),
  325. "modules": data.get("modules"),
  326. "platform": data.get("platform", "other"),
  327. "request": request,
  328. "sdk": data.get("sdk"),
  329. "title": title,
  330. "type": self.type.label,
  331. }
  332. if environment:
  333. json_data["environment"] = environment.name
  334. if data.get("logentry"):
  335. json_data["logentry"] = data.get("logentry")
  336. extra = data.get("extra")
  337. if extra:
  338. json_data["extra"] = extra
  339. user = self.process_user(project, data)
  340. if user:
  341. json_data["user"] = user
  342. errors = None
  343. handled_errors = self.context.get("handled_errors")
  344. if handled_errors:
  345. errors = []
  346. for field_name, field_errors in handled_errors.items():
  347. for error in field_errors:
  348. errors.append(
  349. {
  350. "reason": str(error),
  351. "type": error.code,
  352. "name": field_name,
  353. "value": error.value,
  354. }
  355. )
  356. params = {
  357. "event_id": data["event_id"],
  358. "issue": issue,
  359. "tags": {tag[0]: tag[1] for tag in tags},
  360. "errors": errors,
  361. "timestamp": data.get("timestamp"),
  362. "data": sanitize_bad_postgres_json(json_data),
  363. "release": release,
  364. }
  365. if level:
  366. params["level"] = level
  367. try:
  368. event = Event.objects.create(**params)
  369. except IntegrityError as err:
  370. # This except is more efficient than a query for exists().
  371. if err.args and "event_id" in err.args[0]:
  372. raise PermissionDenied(
  373. "An event with the same ID already exists (%s)"
  374. % params["event_id"]
  375. ) from err
  376. raise err
  377. issue.check_for_status_update()
  378. # Expire after 1 hour - in case of major backup
  379. update_search_index_issue(args=[issue.pk], countdown=10, expires=3600)
  380. return event
  381. class StoreErrorSerializer(StoreDefaultSerializer):
  382. """Primary difference is the presense of exception attribute"""
  383. type = EventType.ERROR
  384. exception = serializers.JSONField(required=False)
  385. stacktrace = serializers.JSONField(
  386. required=False, help_text="Deprecated but supported at this time"
  387. )
  388. class StoreCSPReportSerializer(BaseSerializer):
  389. """
  390. CSP Report Serializer
  391. Very different format from others Store serializers.
  392. Does not extend base class due to differences.
  393. """
  394. type = EventType.CSP
  395. def __init__(self, *args, **kwargs):
  396. super().__init__(*args, **kwargs)
  397. # This is done to support the hyphen
  398. self.fields.update({"csp-report": serializers.JSONField()})
  399. def create(self, validated_data):
  400. project = self.context.get("project")
  401. csp = validated_data["csp-report"]
  402. title = self.get_title(csp)
  403. culprit = self.get_culprit(csp)
  404. uri = self.get_uri(csp)
  405. directive = self.get_effective_directive(csp)
  406. metadata = {
  407. "message": title,
  408. "uri": uri,
  409. "directive": directive,
  410. }
  411. issue, _ = Issue.objects.get_or_create(
  412. title=title,
  413. culprit=culprit,
  414. project_id=project.id,
  415. type=EventType.CSP,
  416. defaults={"metadata": metadata},
  417. )
  418. # Convert - to _
  419. normalized_csp = dict((k.replace("-", "_"), v) for k, v in csp.items())
  420. if "effective_directive" not in normalized_csp:
  421. normalized_csp["effective_directive"] = directive
  422. json_data = {
  423. "culprit": culprit,
  424. "csp": normalized_csp,
  425. "title": title,
  426. "metadata": metadata,
  427. "message": title,
  428. "type": EventType.CSP.label,
  429. }
  430. user = self.process_user(project, validated_data)
  431. if user:
  432. json_data["user"] = user
  433. params = {
  434. "issue": issue,
  435. "data": json_data,
  436. }
  437. return Event.objects.create(**params)
  438. def get_effective_directive(self, data):
  439. """
  440. Some browers return effective-directive and others don't.
  441. Infer missing ones from violated directive
  442. """
  443. if "effective-directive" in data:
  444. return data["effective-directive"]
  445. first_violation = data["violated-directive"].split()[0]
  446. return first_violation
  447. def get_uri(self, data):
  448. url = data["blocked-uri"]
  449. return urlparse(url).netloc
  450. def get_title(self, data):
  451. effective_directive = self.get_effective_directive(data)
  452. humanized_directive = effective_directive.replace("-src", "")
  453. uri = self.get_uri(data)
  454. return f"Blocked '{humanized_directive}' from '{uri}'"
  455. def get_culprit(self, data):
  456. # "style-src cdn.example.com"
  457. return data.get("violated-directive")
  458. class EnvelopeHeaderSerializer(serializers.Serializer):
  459. event_id = serializers.UUIDField(required=False)
  460. sent_at = FlexibleDateTimeField(required=False)