schema.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. import logging
  2. import typing
  3. import uuid
  4. from datetime import datetime
  5. from typing import Annotated, Any, Literal, Optional, Union
  6. from urllib.parse import parse_qs, urlparse
  7. from django.utils.timezone import now
  8. from ninja import Field
  9. from pydantic import (
  10. AliasChoices,
  11. BeforeValidator,
  12. RootModel,
  13. ValidationError,
  14. WrapValidator,
  15. field_validator,
  16. model_validator,
  17. )
  18. from apps.issue_events.constants import IssueEventType
  19. from ..shared.schema.base import LaxIngestSchema
  20. from ..shared.schema.contexts import ContextsSchema
  21. from ..shared.schema.event import (
  22. BaseIssueEvent,
  23. BaseRequest,
  24. EventBreadcrumb,
  25. ListKeyValue,
  26. )
  27. from ..shared.schema.user import EventUser
  28. from ..shared.schema.utils import invalid_to_none
  29. logger = logging.getLogger(__name__)
  30. CoercedStr = Annotated[
  31. str, BeforeValidator(lambda v: str(v) if isinstance(v, bool) else v)
  32. ]
  33. """
  34. Coerced Str that will coerce bool to str when found
  35. """
  36. def coerce_list(v: Any) -> Any:
  37. """Wrap non-list dict into list: {"a": 1} to [{"a": 1}]"""
  38. return v if not isinstance(v, dict) else [v]
  39. class Signal(LaxIngestSchema):
  40. number: int
  41. code: Optional[int]
  42. name: Optional[str]
  43. code_name: Optional[str]
  44. class MachException(LaxIngestSchema):
  45. number: int
  46. code: int
  47. subcode: int
  48. name: Optional[str]
  49. class NSError(LaxIngestSchema):
  50. code: int
  51. domain: str
  52. class Errno(LaxIngestSchema):
  53. number: int
  54. name: Optional[str]
  55. class MechanismMeta(LaxIngestSchema):
  56. signal: Optional[Signal] = None
  57. match_exception: Optional[MachException] = None
  58. ns_error: Optional[NSError] = None
  59. errno: Optional[Errno] = None
  60. class ExceptionMechanism(LaxIngestSchema):
  61. type: str
  62. description: Optional[str] = None
  63. help_link: Optional[str] = None
  64. handled: Optional[bool] = None
  65. synthetic: Optional[bool] = None
  66. meta: Optional[dict] = None
  67. data: Optional[dict] = None
  68. class StackTraceFrame(LaxIngestSchema):
  69. filename: Optional[str] = None
  70. function: Optional[str] = None
  71. raw_function: Optional[str] = None
  72. module: Optional[str] = None
  73. lineno: Optional[int] = None
  74. colno: Optional[int] = None
  75. abs_path: Optional[str] = None
  76. context_line: Optional[str] = None
  77. pre_context: Optional[list[Optional[str]]] = None
  78. post_context: Optional[list[Optional[str]]] = None
  79. source_link: Optional[str] = None
  80. in_app: Optional[bool] = None
  81. stack_start: Optional[bool] = None
  82. vars: Optional[dict[str, Union[str, dict, list]]] = None
  83. instruction_addr: Optional[str] = None
  84. addr_mode: Optional[str] = None
  85. symbol_addr: Optional[str] = None
  86. image_addr: Optional[str] = None
  87. package: Optional[str] = None
  88. platform: Optional[str] = None
  89. def is_url(self, filename: str) -> bool:
  90. return filename.startswith(("file:", "http:", "https:", "applewebdata:"))
  91. @model_validator(mode="after")
  92. def normalize_files(self):
  93. if not self.abs_path and self.filename:
  94. self.abs_path = self.filename
  95. if self.filename and self.is_url(self.filename):
  96. self.filename = urlparse(self.filename).path
  97. return self
  98. @field_validator("pre_context", "post_context")
  99. @classmethod
  100. def replace_null(cls, context: list[Optional[str]]) -> list[Optional[str]]:
  101. if context:
  102. return [line if line else "" for line in context]
  103. class StackTrace(LaxIngestSchema):
  104. frames: list[StackTraceFrame]
  105. registers: Optional[dict[str, str]] = None
  106. class EventException(LaxIngestSchema):
  107. type: Optional[str] = None
  108. value: Annotated[Optional[str], WrapValidator(invalid_to_none)] = None
  109. module: Optional[str] = None
  110. thread_id: Optional[str] = None
  111. mechanism: Optional[ExceptionMechanism] = None
  112. stacktrace: Annotated[Optional[StackTrace], WrapValidator(invalid_to_none)] = None
  113. @model_validator(mode="after")
  114. def check_type_value(self):
  115. if self.type is None and self.value is None:
  116. return None
  117. return self
  118. class ValueEventException(LaxIngestSchema):
  119. values: list[EventException]
  120. @field_validator("values")
  121. @classmethod
  122. def strip_null(cls, v: list[EventException]) -> list[EventException]:
  123. return [e for e in v if e is not None]
  124. class EventMessage(LaxIngestSchema):
  125. formatted: str = Field(max_length=8192, default="")
  126. message: Optional[str] = None
  127. params: Optional[Union[list[str], dict[str, str]]] = None
  128. @model_validator(mode="after")
  129. def set_formatted(self) -> "EventMessage":
  130. """
  131. When the EventMessage formatted string is not set,
  132. attempt to set it based on message and params interpolation
  133. """
  134. if not self.formatted and self.message:
  135. params = self.params
  136. if isinstance(params, list) and params:
  137. self.formatted = self.message % tuple(params)
  138. elif isinstance(params, dict):
  139. self.formatted = self.message.format(**params)
  140. return self
  141. class EventTemplate(LaxIngestSchema):
  142. lineno: int
  143. abs_path: Optional[str] = None
  144. filename: str
  145. context_line: str
  146. pre_context: Optional[list[str]] = None
  147. post_context: Optional[list[str]] = None
  148. class ValueEventBreadcrumb(LaxIngestSchema):
  149. values: list[EventBreadcrumb]
  150. class ClientSDKPackage(LaxIngestSchema):
  151. name: Optional[str] = None
  152. version: Optional[str] = None
  153. class ClientSDKInfo(LaxIngestSchema):
  154. integrations: Optional[list[Optional[str]]] = None
  155. name: Optional[str]
  156. packages: Optional[list[ClientSDKPackage]] = None
  157. version: Optional[str]
  158. @field_validator("packages", mode="before")
  159. def name_must_contain_space(cls, v: Any) -> Any:
  160. return coerce_list(v)
  161. class RequestHeaders(LaxIngestSchema):
  162. content_type: Optional[str]
  163. class RequestEnv(LaxIngestSchema):
  164. remote_addr: Optional[str]
  165. QueryString = Union[str, ListKeyValue, dict[str, Optional[str]]]
  166. """Raw URL querystring, list, or dict"""
  167. KeyValueFormat = Union[list[list[Optional[str]]], dict[str, Optional[CoercedStr]]]
  168. """
  169. key-values in list or dict format. Example {browser: firefox} or [[browser, firefox]]
  170. """
  171. class IngestRequest(BaseRequest):
  172. headers: Optional[KeyValueFormat] = None
  173. query_string: Optional[QueryString] = None
  174. @field_validator("headers", mode="before")
  175. @classmethod
  176. def fix_non_standard_headers(cls, v):
  177. """
  178. Fix non-documented format used by PHP Sentry Client
  179. Convert {"Foo": ["bar"]} into {"Foo: "bar"}
  180. """
  181. if isinstance(v, dict):
  182. return {
  183. key: value[0] if isinstance(value, list) else value
  184. for key, value in v.items()
  185. }
  186. return v
  187. @field_validator("query_string", "headers")
  188. @classmethod
  189. def prefer_list_key_value(
  190. cls, v: Optional[Union[QueryString, KeyValueFormat]]
  191. ) -> Optional[ListKeyValue]:
  192. """Store all querystring, header formats in a list format"""
  193. result: Optional[ListKeyValue] = None
  194. if isinstance(v, str) and v: # It must be a raw querystring, parse it
  195. qs = parse_qs(v)
  196. result = [[key, value] for key, values in qs.items() for value in values]
  197. elif isinstance(v, dict): # Convert dict to list
  198. result = [[key, value] for key, value in v.items()]
  199. elif isinstance(v, list): # Normalize list (throw out any weird data)
  200. result = [item[:2] for item in v if len(item) >= 2]
  201. if result:
  202. # Remove empty and any key called "Cookie" which could be sensitive data
  203. entry_to_remove = ["Cookie", ""]
  204. return sorted(
  205. [entry for entry in result if entry != entry_to_remove],
  206. key=lambda x: (x[0], x[1]),
  207. )
  208. return result
  209. class IngestIssueEvent(BaseIssueEvent):
  210. timestamp: datetime = Field(default_factory=now)
  211. level: Optional[str] = "error"
  212. logentry: Optional[EventMessage] = None
  213. logger: Optional[str] = None
  214. transaction: Optional[str] = Field(
  215. validation_alias=AliasChoices("transaction", "culprit"), default=None
  216. )
  217. server_name: Optional[str] = None
  218. release: Optional[str] = None
  219. dist: Optional[str] = None
  220. tags: Optional[KeyValueFormat] = None
  221. environment: Optional[str] = None
  222. modules: Optional[dict[str, Optional[str]]] = None
  223. extra: Optional[dict[str, Any]] = None
  224. fingerprint: Optional[list[str]] = None
  225. errors: Optional[list[Any]] = None
  226. exception: Optional[Union[list[EventException], ValueEventException]] = None
  227. message: Optional[Union[str, EventMessage]] = None
  228. template: Optional[EventTemplate] = None
  229. breadcrumbs: Optional[Union[list[EventBreadcrumb], ValueEventBreadcrumb]] = None
  230. sdk: Optional[ClientSDKInfo] = None
  231. request: Optional[IngestRequest] = None
  232. contexts: Optional[ContextsSchema] = None
  233. user: Optional[EventUser] = None
  234. @field_validator("tags")
  235. @classmethod
  236. def prefer_dict(
  237. cls, v: Optional[KeyValueFormat]
  238. ) -> Optional[dict[str, Optional[str]]]:
  239. if isinstance(v, list):
  240. return {key: value for key, value in v if key is not None}
  241. return v
  242. class EventIngestSchema(IngestIssueEvent):
  243. event_id: uuid.UUID
  244. class EnvelopeHeaderSchema(LaxIngestSchema):
  245. event_id: Optional[uuid.UUID] = None
  246. dsn: Optional[str] = None
  247. sdk: Optional[ClientSDKInfo] = None
  248. sent_at: datetime = Field(default_factory=now)
  249. SupportedItemType = Literal["transaction", "event"]
  250. IgnoredItemType = Literal[
  251. "session", "sessions", "client_report", "attachment", "user_report", "check_in"
  252. ]
  253. SUPPORTED_ITEMS = typing.get_args(SupportedItemType)
  254. class ItemHeaderSchema(LaxIngestSchema):
  255. content_type: Optional[str] = None
  256. type: Union[SupportedItemType, IgnoredItemType]
  257. length: Optional[int] = None
  258. class EnvelopeSchema(RootModel[list[dict[str, Any]]]):
  259. root: list[dict[str, Any]]
  260. _header: EnvelopeHeaderSchema
  261. _items: list[tuple[ItemHeaderSchema, Union[IngestIssueEvent, dict[str, Any]]]] = []
  262. @model_validator(mode="after")
  263. def validate_envelope(self) -> "EnvelopeSchema":
  264. data = self.root
  265. try:
  266. header = data.pop(0)
  267. except IndexError:
  268. raise ValidationError([{"message": "Envelope is empty"}])
  269. self._header = EnvelopeHeaderSchema(**header)
  270. while len(data) >= 2:
  271. item_header_data = data.pop(0)
  272. if item_header_data.get("type", None) not in SUPPORTED_ITEMS:
  273. continue
  274. item_header = ItemHeaderSchema(**item_header_data)
  275. if item_header.type == "event":
  276. try:
  277. item = IngestIssueEvent(**data.pop(0))
  278. except ValidationError as err:
  279. logger.warning("Envelope Event item invalid", exc_info=True)
  280. raise err
  281. self._items.append((item_header, item))
  282. elif item_header.type == "transaction":
  283. item = data.pop(0)
  284. self._items.append((item_header, item))
  285. return self
  286. class CSPReportSchema(LaxIngestSchema):
  287. """
  288. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy-Report-Only#violation_report_syntax
  289. """
  290. blocked_uri: str = Field(alias="blocked-uri")
  291. disposition: Literal["enforce", "report"] = Field(alias="disposition")
  292. document_uri: str = Field(alias="document-uri")
  293. effective_directive: str = Field(alias="effective-directive")
  294. original_policy: Optional[str] = Field(alias="original-policy")
  295. script_sample: Optional[str] = Field(alias="script-sample", default=None)
  296. status_code: Optional[int] = Field(alias="status-code")
  297. line_number: Optional[int] = None
  298. column_number: Optional[int] = None
  299. class SecuritySchema(LaxIngestSchema):
  300. csp_report: CSPReportSchema = Field(alias="csp-report")
  301. ## Normalized Interchange Issue Events
  302. class IssueEventSchema(IngestIssueEvent):
  303. """
  304. Event storage and interchange format
  305. Used in json view and celery interchange
  306. Don't use this for api intake
  307. """
  308. type: Literal[IssueEventType.DEFAULT] = IssueEventType.DEFAULT
  309. class ErrorIssueEventSchema(IngestIssueEvent):
  310. type: Literal[IssueEventType.ERROR] = IssueEventType.ERROR
  311. class CSPIssueEventSchema(IngestIssueEvent):
  312. type: Literal[IssueEventType.CSP] = IssueEventType.CSP
  313. csp: CSPReportSchema
  314. class InterchangeIssueEvent(LaxIngestSchema):
  315. """Normalized wrapper around issue event. Event should not contain repeat information."""
  316. event_id: uuid.UUID = Field(default_factory=uuid.uuid4)
  317. project_id: int
  318. organization_id: int
  319. received: datetime = Field(default_factory=now)
  320. payload: Union[IssueEventSchema, ErrorIssueEventSchema, CSPIssueEventSchema] = (
  321. Field(discriminator="type")
  322. )