schema.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. import logging
  2. import typing
  3. import uuid
  4. from datetime import datetime
  5. from typing import Annotated, Any, Literal, Union
  6. from urllib.parse import parse_qs, urlparse
  7. from django.utils.timezone import now
  8. from ninja import Field
  9. from pydantic import (
  10. AliasChoices,
  11. BeforeValidator,
  12. JsonValue,
  13. RootModel,
  14. ValidationError,
  15. WrapValidator,
  16. field_validator,
  17. model_validator,
  18. )
  19. from apps.issue_events.constants import IssueEventType
  20. from ..shared.schema.base import LaxIngestSchema
  21. from ..shared.schema.contexts import ContextsSchema
  22. from ..shared.schema.event import (
  23. BaseIssueEvent,
  24. BaseRequest,
  25. EventBreadcrumb,
  26. ListKeyValue,
  27. )
  28. from ..shared.schema.user import EventUser
  29. from ..shared.schema.utils import invalid_to_none
  30. logger = logging.getLogger(__name__)
  31. CoercedStr = Annotated[
  32. str, BeforeValidator(lambda v: str(v) if isinstance(v, bool) else v)
  33. ]
  34. """
  35. Coerced Str that will coerce bool to str when found
  36. """
  37. def coerce_list(v: Any) -> Any:
  38. """Wrap non-list dict into list: {"a": 1} to [{"a": 1}]"""
  39. return v if not isinstance(v, dict) else [v]
  40. class Signal(LaxIngestSchema):
  41. number: int
  42. code: int | None
  43. name: str | None
  44. code_name: str | None
  45. class MachException(LaxIngestSchema):
  46. number: int
  47. code: int
  48. subcode: int
  49. name: str | None
  50. class NSError(LaxIngestSchema):
  51. code: int
  52. domain: str
  53. class Errno(LaxIngestSchema):
  54. number: int
  55. name: str | None
  56. class MechanismMeta(LaxIngestSchema):
  57. signal: Signal | None = None
  58. match_exception: MachException | None = None
  59. ns_error: NSError | None = None
  60. errno: Errno | None = None
  61. class ExceptionMechanism(LaxIngestSchema):
  62. type: str
  63. description: str | None = None
  64. help_link: str | None = None
  65. handled: bool | None = None
  66. synthetic: bool | None = None
  67. meta: dict | None = None
  68. data: dict | None = None
  69. class StackTraceFrame(LaxIngestSchema):
  70. filename: str | None = None
  71. function: str | None = None
  72. raw_function: str | None = None
  73. module: str | None = None
  74. lineno: int | None = None
  75. colno: int | None = None
  76. abs_path: str | None = None
  77. context_line: str | None = None
  78. pre_context: list[str | None] | None = None
  79. post_context: list[str | None] | None = None
  80. source_link: str | None = None
  81. in_app: bool | None = None
  82. stack_start: bool | None = None
  83. vars: dict[str, Union[str, dict, list]] | None = None
  84. instruction_addr: str | None = None
  85. addr_mode: str | None = None
  86. symbol_addr: str | None = None
  87. image_addr: str | None = None
  88. package: str | None = None
  89. platform: str | None = None
  90. def is_url(self, filename: str) -> bool:
  91. return filename.startswith(("file:", "http:", "https:", "applewebdata:"))
  92. @model_validator(mode="after")
  93. def normalize_files(self):
  94. if not self.abs_path and self.filename:
  95. self.abs_path = self.filename
  96. if self.filename and self.is_url(self.filename):
  97. self.filename = urlparse(self.filename).path
  98. return self
  99. @field_validator("pre_context", "post_context")
  100. @classmethod
  101. def replace_null(cls, context: list[str | None]) -> list[str | None] | None:
  102. if context:
  103. return [line if line else "" for line in context]
  104. return None
  105. class StackTrace(LaxIngestSchema):
  106. frames: list[StackTraceFrame]
  107. registers: dict[str, str] | None = None
  108. class EventException(LaxIngestSchema):
  109. type: str | None = None
  110. value: Annotated[str | None, WrapValidator(invalid_to_none)] = None
  111. module: str | None = None
  112. thread_id: str | None = None
  113. mechanism: ExceptionMechanism | None = None
  114. stacktrace: Annotated[StackTrace | None, WrapValidator(invalid_to_none)] = None
  115. @model_validator(mode="after")
  116. def check_type_value(self):
  117. if self.type is None and self.value is None:
  118. return None
  119. return self
  120. class ValueEventException(LaxIngestSchema):
  121. values: list[EventException]
  122. @field_validator("values")
  123. @classmethod
  124. def strip_null(cls, v: list[EventException]) -> list[EventException]:
  125. return [e for e in v if e is not None]
  126. class EventMessage(LaxIngestSchema):
  127. formatted: str = Field(max_length=8192, default="")
  128. message: str | None = None
  129. params: Union[list[str], dict[str, str]] | None = None
  130. @model_validator(mode="after")
  131. def set_formatted(self) -> "EventMessage":
  132. """
  133. When the EventMessage formatted string is not set,
  134. attempt to set it based on message and params interpolation
  135. """
  136. if not self.formatted and self.message:
  137. params = self.params
  138. if isinstance(params, list) and params:
  139. self.formatted = self.message % tuple(params)
  140. elif isinstance(params, dict):
  141. self.formatted = self.message.format(**params)
  142. return self
  143. class EventTemplate(LaxIngestSchema):
  144. lineno: int
  145. abs_path: str | None = None
  146. filename: str
  147. context_line: str
  148. pre_context: list[str] | None = None
  149. post_context: list[str] | None = None
  150. class ValueEventBreadcrumb(LaxIngestSchema):
  151. values: list[EventBreadcrumb]
  152. class ClientSDKPackage(LaxIngestSchema):
  153. name: str | None = None
  154. version: str | None = None
  155. class ClientSDKInfo(LaxIngestSchema):
  156. integrations: list[str | None] | None = None
  157. name: str | None
  158. packages: list[ClientSDKPackage] | None = None
  159. version: str | None
  160. @field_validator("packages", mode="before")
  161. def name_must_contain_space(cls, v: Any) -> Any:
  162. return coerce_list(v)
  163. class RequestHeaders(LaxIngestSchema):
  164. content_type: str | None
  165. class RequestEnv(LaxIngestSchema):
  166. remote_addr: str | None
  167. QueryString = Union[str, ListKeyValue, dict[str, str | None]]
  168. """Raw URL querystring, list, or dict"""
  169. KeyValueFormat = Union[list[list[str | None]], dict[str, CoercedStr | None]]
  170. """
  171. key-values in list or dict format. Example {browser: firefox} or [[browser, firefox]]
  172. """
  173. class IngestRequest(BaseRequest):
  174. headers: KeyValueFormat | None = None
  175. query_string: QueryString | None = None
  176. @field_validator("headers", mode="before")
  177. @classmethod
  178. def fix_non_standard_headers(cls, v):
  179. """
  180. Fix non-documented format used by PHP Sentry Client
  181. Convert {"Foo": ["bar"]} into {"Foo: "bar"}
  182. """
  183. if isinstance(v, dict):
  184. return {
  185. key: value[0] if isinstance(value, list) else value
  186. for key, value in v.items()
  187. }
  188. return v
  189. @field_validator("query_string", "headers")
  190. @classmethod
  191. def prefer_list_key_value(
  192. cls, v: Union[QueryString, KeyValueFormat] | None
  193. ) -> ListKeyValue | None:
  194. """Store all querystring, header formats in a list format"""
  195. result: ListKeyValue | None = None
  196. if isinstance(v, str) and v: # It must be a raw querystring, parse it
  197. qs = parse_qs(v)
  198. result = [[key, value] for key, values in qs.items() for value in values]
  199. elif isinstance(v, dict): # Convert dict to list
  200. result = [[key, value] for key, value in v.items()]
  201. elif isinstance(v, list): # Normalize list (throw out any weird data)
  202. result = [item[:2] for item in v if len(item) >= 2]
  203. if result:
  204. # Remove empty and any key called "Cookie" which could be sensitive data
  205. entry_to_remove = ["Cookie", ""]
  206. return sorted(
  207. [entry for entry in result if entry != entry_to_remove],
  208. key=lambda x: (x[0], x[1]),
  209. )
  210. return result
  211. class IngestIssueEvent(BaseIssueEvent):
  212. timestamp: datetime = Field(default_factory=now)
  213. level: str | None = "error"
  214. logentry: EventMessage | None = None
  215. logger: str | None = None
  216. transaction: str | None = Field(
  217. validation_alias=AliasChoices("transaction", "culprit"), default=None
  218. )
  219. server_name: str | None = None
  220. release: str | None = None
  221. dist: str | None = None
  222. tags: KeyValueFormat | None = None
  223. environment: str | None = None
  224. modules: dict[str, str | None] | None = None
  225. extra: dict[str, Any] | None = None
  226. fingerprint: list[str] | None = None
  227. errors: list[Any] | None = None
  228. exception: Union[list[EventException], ValueEventException] | None = None
  229. message: Union[str, EventMessage] | None = None
  230. template: EventTemplate | None = None
  231. breadcrumbs: Union[list[EventBreadcrumb], ValueEventBreadcrumb] | None = None
  232. sdk: ClientSDKInfo | None = None
  233. request: IngestRequest | None = None
  234. contexts: ContextsSchema | None = None
  235. user: EventUser | None = None
  236. @field_validator("tags")
  237. @classmethod
  238. def prefer_dict(cls, v: KeyValueFormat | None) -> dict[str, str | None] | None:
  239. if isinstance(v, list):
  240. return {key: value for key, value in v if key is not None}
  241. return v
  242. class EventIngestSchema(IngestIssueEvent):
  243. event_id: uuid.UUID
  244. class TransactionEventSchema(LaxIngestSchema):
  245. type: Literal["transaction"]
  246. contexts: JsonValue
  247. measurements: JsonValue | None = None
  248. start_timestamp: datetime
  249. timestamp: datetime
  250. transaction: str
  251. # # SentrySDKEventSerializer
  252. breadcrumbs: JsonValue | None = None
  253. fingerprint: list[str] | None = None
  254. tags: KeyValueFormat | None = None
  255. event_id: uuid.UUID = Field(default_factory=uuid.uuid4)
  256. extra: JsonValue | None
  257. request: IngestRequest | None = None
  258. server_name: str | None
  259. sdk: ClientSDKInfo | None = None
  260. platform: str | None
  261. release: str | None = None
  262. environment: str | None = None
  263. _meta: JsonValue | None
  264. class EnvelopeHeaderSchema(LaxIngestSchema):
  265. event_id: uuid.UUID | None = None
  266. dsn: str | None = None
  267. sdk: ClientSDKInfo | None = None
  268. sent_at: datetime = Field(default_factory=now)
  269. SupportedItemType = Literal["transaction", "event"]
  270. IgnoredItemType = Literal[
  271. "session", "sessions", "client_report", "attachment", "user_report", "check_in"
  272. ]
  273. SUPPORTED_ITEMS = typing.get_args(SupportedItemType)
  274. class ItemHeaderSchema(LaxIngestSchema):
  275. content_type: str | None = None
  276. type: Union[SupportedItemType, IgnoredItemType]
  277. length: int | None = None
  278. class EnvelopeSchema(RootModel[list[dict[str, Any]]]):
  279. root: list[dict[str, Any]]
  280. _header: EnvelopeHeaderSchema
  281. _items: list[
  282. tuple[ItemHeaderSchema, IngestIssueEvent | TransactionEventSchema]
  283. ] = []
  284. @model_validator(mode="after")
  285. def validate_envelope(self) -> "EnvelopeSchema":
  286. data = self.root
  287. try:
  288. header = data.pop(0)
  289. except IndexError:
  290. raise ValidationError([{"message": "Envelope is empty"}])
  291. self._header = EnvelopeHeaderSchema(**header)
  292. while len(data) >= 2:
  293. item_header_data = data.pop(0)
  294. if item_header_data.get("type", None) not in SUPPORTED_ITEMS:
  295. continue
  296. item_header = ItemHeaderSchema(**item_header_data)
  297. if item_header.type == "event":
  298. try:
  299. item = IngestIssueEvent(**data.pop(0))
  300. except ValidationError as err:
  301. logger.warning("Envelope Event item invalid", exc_info=True)
  302. raise err
  303. self._items.append((item_header, item))
  304. elif item_header.type == "transaction":
  305. item = TransactionEventSchema(**data.pop(0))
  306. self._items.append((item_header, item))
  307. return self
  308. class CSPReportSchema(LaxIngestSchema):
  309. """
  310. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy-Report-Only#violation_report_syntax
  311. """
  312. blocked_uri: str = Field(alias="blocked-uri")
  313. disposition: Literal["enforce", "report"] = Field(alias="disposition")
  314. document_uri: str = Field(alias="document-uri")
  315. effective_directive: str = Field(alias="effective-directive")
  316. original_policy: str | None = Field(alias="original-policy")
  317. script_sample: str | None = Field(alias="script-sample", default=None)
  318. status_code: int | None = Field(alias="status-code")
  319. line_number: int | None = None
  320. column_number: int | None = None
  321. class SecuritySchema(LaxIngestSchema):
  322. csp_report: CSPReportSchema = Field(alias="csp-report")
  323. ## Normalized Interchange Issue Events
  324. class IssueEventSchema(IngestIssueEvent):
  325. """
  326. Event storage and interchange format
  327. Used in json view and celery interchange
  328. Don't use this for api intake
  329. """
  330. type: Literal[IssueEventType.DEFAULT] = IssueEventType.DEFAULT
  331. class ErrorIssueEventSchema(IngestIssueEvent):
  332. type: Literal[IssueEventType.ERROR] = IssueEventType.ERROR
  333. class CSPIssueEventSchema(IngestIssueEvent):
  334. type: Literal[IssueEventType.CSP] = IssueEventType.CSP
  335. csp: CSPReportSchema
  336. class InterchangeEvent(LaxIngestSchema):
  337. """Normalized wrapper around issue event. Event should not contain repeat information."""
  338. event_id: uuid.UUID = Field(default_factory=uuid.uuid4)
  339. project_id: int
  340. organization_id: int
  341. received: datetime = Field(default_factory=now)
  342. payload: (
  343. IssueEventSchema
  344. | ErrorIssueEventSchema
  345. | CSPIssueEventSchema
  346. | TransactionEventSchema
  347. ) = Field(discriminator="type")
  348. class InterchangeIssueEvent(InterchangeEvent):
  349. payload: (
  350. IssueEventSchema
  351. | ErrorIssueEventSchema
  352. | CSPIssueEventSchema
  353. | TransactionEventSchema
  354. ) = Field(discriminator="type")
  355. class InterchangeTransactionEvent(InterchangeEvent):
  356. payload: TransactionEventSchema