serializers.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. import uuid
  2. from typing import Dict, List, Tuple, Union
  3. from urllib.parse import urlparse
  4. from anonymizeip import anonymize_ip
  5. from django.db import transaction
  6. from django.db.models.expressions import OuterRef, RawSQL
  7. from django.db.utils import IntegrityError
  8. from ipware import get_client_ip
  9. from rest_framework import serializers
  10. from rest_framework.exceptions import PermissionDenied
  11. from environments.models import Environment
  12. from glitchtip.serializers import FlexibleDateTimeField
  13. from issues.models import EventType, Issue
  14. from issues.serializers import BaseBreadcrumbsSerializer
  15. from issues.tasks import update_search_index_issue
  16. from observability.metrics import events_counter, issues_counter
  17. from releases.models import Release
  18. from sentry.eventtypes.base import DefaultEvent
  19. from sentry.eventtypes.error import ErrorEvent
  20. from .event_context_processors import EVENT_CONTEXT_PROCESSORS
  21. from .event_processors import EVENT_PROCESSORS
  22. from .event_tag_processors import TAG_PROCESSORS
  23. from .fields import (
  24. ForgivingDisallowRegexField,
  25. ForgivingHStoreField,
  26. GenericField,
  27. QueryStringField,
  28. )
  29. from .models import Event, LogLevel
  30. from .utils import generate_hash
  31. def replace(data: Union[str, dict, list], match: str, repl: str):
  32. """A recursive replace function"""
  33. if isinstance(data, dict):
  34. return {k: replace(v, match, repl) for k, v in data.items()}
  35. elif isinstance(data, list):
  36. return [replace(i, match, repl) for i in data]
  37. elif isinstance(data, str):
  38. return data.replace(match, repl)
  39. return data
  40. def sanitize_bad_postgres_chars(data: Union[str, dict, list]):
  41. """
  42. Remove values which are not supported by the postgres string data types
  43. """
  44. known_bads = ["\x00"]
  45. for known_bad in known_bads:
  46. data = data.replace(known_bad, " ")
  47. return data
  48. def sanitize_bad_postgres_json(data: Union[str, dict, list]):
  49. """
  50. Remove values which are not supported by the postgres JSONB data type
  51. """
  52. known_bads = ["\u0000"]
  53. for known_bad in known_bads:
  54. data = replace(data, known_bad, " ")
  55. return data
  56. class RequestSerializer(serializers.Serializer):
  57. env = serializers.DictField(
  58. child=serializers.CharField(allow_blank=True, allow_null=True), required=False
  59. )
  60. # Dict values can be both str and List[str]
  61. headers = serializers.DictField(required=False)
  62. url = serializers.CharField(required=False, allow_blank=True)
  63. method = serializers.CharField(required=False, allow_blank=True)
  64. query_string = QueryStringField(required=False, allow_null=True)
  65. class BreadcrumbsSerializer(BaseBreadcrumbsSerializer):
  66. timestamp = GenericField(required=False)
  67. def validate_level(self, value):
  68. if value == "log":
  69. return "info"
  70. return value
  71. class BaseSerializer(serializers.Serializer):
  72. def process_user(self, project, data):
  73. """Fetch user data from SDK event and request"""
  74. user = data.get("user", {})
  75. if self.context and self.context.get("request"):
  76. client_ip, is_routable = get_client_ip(self.context["request"])
  77. if user or is_routable:
  78. if is_routable:
  79. if project.should_scrub_ip_addresses:
  80. client_ip = anonymize_ip(client_ip)
  81. user["ip_address"] = client_ip
  82. return user
  83. class SentrySDKEventSerializer(BaseSerializer):
  84. """Represents events coming from a OSS sentry SDK client"""
  85. breadcrumbs = serializers.JSONField(required=False)
  86. fingerprint = serializers.ListField(child=serializers.CharField(), required=False)
  87. tags = ForgivingHStoreField(required=False)
  88. event_id = serializers.UUIDField(required=False, default=uuid.uuid4)
  89. extra = serializers.JSONField(required=False)
  90. request = RequestSerializer(required=False)
  91. server_name = serializers.CharField(required=False)
  92. sdk = serializers.JSONField(required=False)
  93. platform = serializers.CharField(required=False)
  94. release = serializers.CharField(required=False, allow_null=True, allow_blank=True)
  95. environment = ForgivingDisallowRegexField(
  96. required=False, allow_null=True, disallow_regex=r"^[^\n\r\f\/]*$"
  97. )
  98. _meta = serializers.JSONField(required=False)
  99. def set_environment(self, name: str, project) -> str:
  100. if not project.environment_id and name:
  101. environment, _ = Environment.objects.get_or_create(
  102. name=name[: Environment._meta.get_field("name").max_length],
  103. organization=project.organization,
  104. )
  105. environment.projects.add(project)
  106. project.environment_id = environment.id
  107. return environment.name
  108. return name
  109. def set_release(self, version: str, project) -> str:
  110. """
  111. Set project.release_id if not already so
  112. Create needed Release if necessary
  113. """
  114. if not project.release_id and version:
  115. release, _ = Release.objects.get_or_create(
  116. version=version, organization=project.organization
  117. )
  118. release.projects.add(project)
  119. project.release_id = release.id
  120. return release.version
  121. return version
  122. class FormattedMessageSerializer(serializers.Serializer):
  123. formatted = serializers.CharField(
  124. required=False
  125. ) # Documented as required, but some Sentry SDKs don't send it
  126. message = serializers.CharField(required=False)
  127. params = serializers.JSONField(required=False)
  128. def validate(self, attrs):
  129. data = super().validate(attrs)
  130. if not data.get("formatted") and data.get("params"):
  131. params = data["params"]
  132. if isinstance(params, list):
  133. data["formatted"] = data["message"] % tuple(params)
  134. elif isinstance(params, dict):
  135. data["formatted"] = data["message"].format(**params)
  136. return data
  137. # OSS Sentry only keeps unformatted "message" when it creates a formatted message
  138. return {key: data[key] for key in data if key != "message"}
  139. class MessageField(serializers.CharField):
  140. def to_internal_value(self, data):
  141. if isinstance(data, dict):
  142. serializer = FormattedMessageSerializer(data=data)
  143. serializer.is_valid(raise_exception=True)
  144. return serializer.validated_data
  145. return super().to_internal_value(data)
  146. class LogEntrySerializer(serializers.Serializer):
  147. formatted = serializers.CharField(required=False)
  148. message = serializers.CharField(required=False, allow_blank=True)
  149. params = serializers.JSONField(required=False)
  150. def validate(self, attrs):
  151. data = super().validate(attrs)
  152. if not data.get("formatted") and data.get("params"):
  153. params = data["params"]
  154. if isinstance(params, list):
  155. data["formatted"] = data["message"] % tuple(data["params"])
  156. elif isinstance(params, dict):
  157. data["formatted"] = data["message"].format(**params)
  158. return data
  159. class StoreDefaultSerializer(SentrySDKEventSerializer):
  160. """
  161. Default serializer. Used as both a base class and for default error types
  162. """
  163. type = EventType.DEFAULT
  164. contexts = serializers.JSONField(required=False)
  165. level = serializers.CharField(required=False)
  166. logentry = LogEntrySerializer(required=False)
  167. message = MessageField(required=False, allow_blank=True, allow_null=True)
  168. timestamp = FlexibleDateTimeField(required=False)
  169. transaction = serializers.CharField(
  170. required=False, allow_null=True, allow_blank=True
  171. )
  172. user = serializers.JSONField(required=False)
  173. modules = serializers.JSONField(required=False)
  174. def validate_breadcrumbs(self, value):
  175. """
  176. Normalize breadcrumbs, which may come in as dict or list
  177. """
  178. if isinstance(value, list):
  179. value = {"values": value}
  180. if value.get("values") == []:
  181. return None
  182. serializer = BreadcrumbsSerializer(data=value.get("values"), many=True)
  183. if serializer.is_valid():
  184. return {"values": serializer.validated_data}
  185. return value
  186. def get_eventtype(self):
  187. """Get event type class from self.type"""
  188. if self.type is EventType.DEFAULT:
  189. return DefaultEvent()
  190. if self.type is EventType.ERROR:
  191. return ErrorEvent()
  192. def modify_exception(self, exception):
  193. """OSS Sentry does this, I have no idea why"""
  194. if exception:
  195. for value in exception.get("values", []):
  196. value.pop("module", None)
  197. if value.get("stacktrace") and value["stacktrace"].get("frames"):
  198. frames = value["stacktrace"]["frames"]
  199. # If in_app is always true, make it false ¯\_(ツ)_/¯
  200. if all(x.get("in_app") for x in frames):
  201. for frame in frames:
  202. frame["in_app"] = False
  203. return exception
  204. def generate_tags(self, data: Dict, tags: List[Tuple[str, str]] = None):
  205. """
  206. Determine tag relational data
  207. Optionally pass tags array for existing known tags to generate
  208. """
  209. if tags is None:
  210. tags = []
  211. for Processor in TAG_PROCESSORS:
  212. processor = Processor()
  213. value = processor.get_tag_values(data)
  214. if value:
  215. tags.append((processor.tag, value))
  216. if data.get("tags"):
  217. tags += [(k, v) for k, v in data["tags"].items()]
  218. return tags
  219. def annotate_contexts(self, event):
  220. """
  221. SDK events may contain contexts. This function adds additional contexts data
  222. """
  223. contexts = event.get("contexts")
  224. for Processor in EVENT_CONTEXT_PROCESSORS:
  225. processor = Processor()
  226. if contexts is None or not contexts.get(processor.name):
  227. processor_contexts = processor.get_context(event)
  228. if processor_contexts:
  229. if contexts is None:
  230. contexts = {}
  231. contexts[processor.name] = processor_contexts
  232. return contexts
  233. def get_message(self, data):
  234. """Prefer message over logentry"""
  235. if "message" in data:
  236. if isinstance(data["message"], dict):
  237. return data["message"].get("formatted") or data["message"].get(
  238. "message", ""
  239. )
  240. return data["message"]
  241. return data.get("logentry", {}).get("message", "")
  242. def get_logentry(self, data):
  243. if "logentry" in data:
  244. return data.get("logentry")
  245. elif "message" in data:
  246. message = data["message"]
  247. if isinstance(message, dict):
  248. return message
  249. return {"formatted": message}
  250. def is_url(self, filename: str) -> bool:
  251. return filename.startswith(("file:", "http:", "https:", "applewebdata:"))
  252. def normalize_stacktrace(self, stacktrace):
  253. """
  254. Port of semaphore store/normalize/stacktrace.rs
  255. """
  256. if not stacktrace:
  257. return
  258. for frame in stacktrace.get("frames", []):
  259. if not frame.get("abs_path") and frame.get("filename"):
  260. frame["abs_path"] = frame["filename"]
  261. if frame.get("filename") and self.is_url(frame["filename"]):
  262. frame["filename"] = urlparse(frame["filename"]).path
  263. def create(self, validated_data):
  264. data = validated_data
  265. project = self.context.get("project")
  266. eventtype = self.get_eventtype()
  267. metadata = eventtype.get_metadata(data)
  268. exception = data.get("exception")
  269. if (
  270. data.get("stacktrace")
  271. and exception
  272. and len(exception.get("values", 0)) > 0
  273. and not exception["values"][0].get("stacktrace")
  274. ):
  275. # stacktrace is deprecated, but supported at this time
  276. # Assume it's for the first exception value
  277. exception["values"][0]["stacktrace"] = data.get("stacktrace")
  278. exception = self.modify_exception(exception)
  279. if isinstance(exception, dict):
  280. for value in exception.get("values", []):
  281. self.normalize_stacktrace(value.get("stacktrace"))
  282. tags = []
  283. release = self.set_release(data.get("release"), project)
  284. if project.release_id:
  285. tags.append(("release", release))
  286. environment = self.set_environment(data.get("environment"), project)
  287. if project.environment_id:
  288. tags.append(("environment", environment))
  289. if data.get("server_name"):
  290. tags.append(("server_name", data.get("server_name")))
  291. for Processor in EVENT_PROCESSORS:
  292. Processor(project, project.release_id, data).run()
  293. title = eventtype.get_title(metadata)
  294. culprit = eventtype.get_location(data)
  295. issue_hash = generate_hash(title, culprit, self.type, data.get("fingerprint"))
  296. request = data.get("request")
  297. breadcrumbs = data.get("breadcrumbs")
  298. level = None
  299. if data.get("level"):
  300. level = LogLevel.from_string(data["level"])
  301. if request:
  302. headers = request.get("headers")
  303. if headers:
  304. request["inferred_content_type"] = headers.get("Content-Type")
  305. sorted_headers = sorted([pair for pair in headers.items()])
  306. for idx, header in enumerate(sorted_headers):
  307. if isinstance(header[1], list):
  308. sorted_headers[idx] = (header[0], header[1][0])
  309. request["headers"] = sorted_headers
  310. contexts = self.annotate_contexts(data)
  311. data["contexts"] = contexts
  312. with transaction.atomic():
  313. if not project.first_event:
  314. project.first_event = data.get("timestamp")
  315. project.save(update_fields=["first_event"])
  316. defaults = {
  317. "metadata": sanitize_bad_postgres_json(metadata),
  318. "title": sanitize_bad_postgres_chars(title),
  319. "culprit": sanitize_bad_postgres_chars(culprit),
  320. "type": self.type,
  321. }
  322. if level:
  323. defaults["level"] = level
  324. tags = self.generate_tags(data, tags)
  325. defaults["tags"] = {tag[0]: [tag[1]] for tag in tags}
  326. issue_created = False
  327. # Similar to get_or_create but with multiple tables
  328. try:
  329. issue = Issue.objects.get(
  330. project_id=project.id,
  331. issuehash__value=issue_hash,
  332. )
  333. except Issue.DoesNotExist:
  334. with transaction.atomic():
  335. issue = Issue.objects.create(project_id=project.id, **defaults)
  336. try:
  337. issue.issuehash_set.create(value=issue_hash, project=project)
  338. issue_created = True
  339. except IntegrityError:
  340. issue = Issue.objects.get(
  341. project_id=project.id,
  342. issuehash__value=issue_hash,
  343. )
  344. json_data = {
  345. "breadcrumbs": breadcrumbs,
  346. "contexts": contexts,
  347. "culprit": culprit,
  348. "exception": exception,
  349. "logentry": self.get_logentry(data),
  350. "metadata": metadata,
  351. "message": self.get_message(data),
  352. "modules": data.get("modules"),
  353. "platform": data.get("platform", "other"),
  354. "request": request,
  355. "sdk": data.get("sdk"),
  356. "title": title,
  357. "type": self.type.label,
  358. }
  359. if project.environment_id:
  360. json_data["environment"] = data.get("environment")
  361. if data.get("logentry"):
  362. json_data["logentry"] = data.get("logentry")
  363. extra = data.get("extra")
  364. if extra:
  365. json_data["extra"] = extra
  366. user = self.process_user(project, data)
  367. if user:
  368. json_data["user"] = user
  369. errors = None
  370. handled_errors = self.context.get("handled_errors")
  371. if handled_errors:
  372. errors = []
  373. for field_name, field_errors in handled_errors.items():
  374. for error in field_errors:
  375. errors.append(
  376. {
  377. "reason": str(error),
  378. "type": error.code,
  379. "name": field_name,
  380. "value": error.value,
  381. }
  382. )
  383. params = {
  384. "event_id": data["event_id"],
  385. "issue": issue,
  386. "tags": {tag[0]: tag[1] for tag in tags},
  387. "errors": errors,
  388. "timestamp": data.get("timestamp"),
  389. "data": sanitize_bad_postgres_json(json_data),
  390. "release_id": project.release_id,
  391. }
  392. if level:
  393. params["level"] = level
  394. events_counter.labels(
  395. project.slug, project.organization.slug, issue.title
  396. ).inc()
  397. try:
  398. event = Event.objects.create(**params)
  399. except IntegrityError as err:
  400. # This except is more efficient than a query for exists().
  401. if err.args and "event_id" in err.args[0]:
  402. raise PermissionDenied(
  403. "An event with the same ID already exists (%s)"
  404. % params["event_id"]
  405. ) from err
  406. raise err
  407. if issue_created: # Do it right now, so that new issues look correct
  408. issues_counter.labels(project.slug, project.organization.slug).inc()
  409. event_data = Event.objects.filter(issue_id=OuterRef("id")).values("data")[
  410. :1
  411. ]
  412. event_vector = event_data.annotate(
  413. search_vector=RawSQL("select generate_issue_tsvector(data)", [])
  414. ).values("search_vector")
  415. Issue.objects.filter(pk=issue.pk).update(
  416. search_vector=event_vector, last_seen=event.created
  417. )
  418. else: # Updates can be slower and debounced
  419. issue.check_for_status_update()
  420. # Expire after 1 hour - in case of major backup
  421. update_search_index_issue(args=[issue.pk])
  422. return event
  423. class StoreErrorSerializer(StoreDefaultSerializer):
  424. """Primary difference is the presense of exception attribute"""
  425. type = EventType.ERROR
  426. exception = serializers.JSONField(required=False)
  427. stacktrace = serializers.JSONField(
  428. required=False, help_text="Deprecated but supported at this time"
  429. )
  430. class StoreCSPReportSerializer(BaseSerializer):
  431. """
  432. CSP Report Serializer
  433. Very different format from others Store serializers.
  434. Does not extend base class due to differences.
  435. """
  436. type = EventType.CSP
  437. def __init__(self, *args, **kwargs):
  438. super().__init__(*args, **kwargs)
  439. # This is done to support the hyphen
  440. self.fields.update({"csp-report": serializers.JSONField()})
  441. def create(self, validated_data):
  442. project = self.context.get("project")
  443. csp = validated_data["csp-report"]
  444. title = self.get_title(csp)
  445. culprit = self.get_culprit(csp)
  446. uri = self.get_uri(csp)
  447. directive = self.get_effective_directive(csp)
  448. metadata = {
  449. "message": title,
  450. "uri": uri,
  451. "directive": directive,
  452. }
  453. issue, _ = Issue.objects.get_or_create(
  454. title=title,
  455. culprit=culprit,
  456. project_id=project.id,
  457. type=EventType.CSP,
  458. defaults={"metadata": metadata},
  459. )
  460. # Convert - to _
  461. normalized_csp = dict((k.replace("-", "_"), v) for k, v in csp.items())
  462. if "effective_directive" not in normalized_csp:
  463. normalized_csp["effective_directive"] = directive
  464. json_data = {
  465. "culprit": culprit,
  466. "csp": normalized_csp,
  467. "title": title,
  468. "metadata": metadata,
  469. "message": title,
  470. "type": EventType.CSP.label,
  471. }
  472. user = self.process_user(project, validated_data)
  473. if user:
  474. json_data["user"] = user
  475. params = {
  476. "issue": issue,
  477. "data": json_data,
  478. }
  479. return Event.objects.create(**params)
  480. def get_effective_directive(self, data):
  481. """
  482. Some browers return effective-directive and others don't.
  483. Infer missing ones from violated directive
  484. """
  485. if "effective-directive" in data:
  486. return data["effective-directive"]
  487. first_violation = data["violated-directive"].split()[0]
  488. return first_violation
  489. def get_uri(self, data):
  490. url = data["blocked-uri"]
  491. return urlparse(url).netloc
  492. def get_title(self, data):
  493. effective_directive = self.get_effective_directive(data)
  494. humanized_directive = effective_directive.replace("-src", "")
  495. uri = self.get_uri(data)
  496. return f"Blocked '{humanized_directive}' from '{uri}'"
  497. def get_culprit(self, data):
  498. # "style-src cdn.example.com"
  499. return data.get("violated-directive")
  500. class EnvelopeHeaderSerializer(serializers.Serializer):
  501. event_id = serializers.UUIDField(required=False)
  502. sent_at = FlexibleDateTimeField(required=False)