schema.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. import logging
  2. import typing
  3. import uuid
  4. from datetime import datetime
  5. from typing import Annotated, Any, Literal, Union
  6. from urllib.parse import parse_qs, urlparse
  7. from django.utils.timezone import now
  8. from ninja import Field
  9. from pydantic import (
  10. AliasChoices,
  11. BaseModel,
  12. BeforeValidator,
  13. JsonValue,
  14. RootModel,
  15. ValidationError,
  16. WrapValidator,
  17. field_validator,
  18. model_validator,
  19. )
  20. from apps.issue_events.constants import IssueEventType
  21. from ..shared.schema.base import LaxIngestSchema
  22. from ..shared.schema.contexts import Contexts
  23. from ..shared.schema.event import (
  24. BaseIssueEvent,
  25. BaseRequest,
  26. EventBreadcrumb,
  27. ListKeyValue,
  28. )
  29. from ..shared.schema.user import EventUser
  30. from ..shared.schema.utils import invalid_to_none
  31. logger = logging.getLogger(__name__)
  32. CoercedStr = Annotated[
  33. str, BeforeValidator(lambda v: str(v) if isinstance(v, (bool, list)) else v)
  34. ]
  35. """
  36. Coerced Str that will coerce bool/list to str when found
  37. """
  38. def coerce_list(v: Any) -> Any:
  39. """Wrap non-list dict into list: {"a": 1} to [{"a": 1}]"""
  40. return v if not isinstance(v, dict) else [v]
  41. class Signal(LaxIngestSchema):
  42. number: int
  43. code: int | None
  44. name: str | None
  45. code_name: str | None
  46. class MachException(LaxIngestSchema):
  47. number: int
  48. code: int
  49. subcode: int
  50. name: str | None
  51. class NSError(LaxIngestSchema):
  52. code: int
  53. domain: str
  54. class Errno(LaxIngestSchema):
  55. number: int
  56. name: str | None
  57. class MechanismMeta(LaxIngestSchema):
  58. signal: Signal | None = None
  59. match_exception: MachException | None = None
  60. ns_error: NSError | None = None
  61. errno: Errno | None = None
  62. class ExceptionMechanism(LaxIngestSchema):
  63. type: str
  64. description: str | None = None
  65. help_link: str | None = None
  66. handled: bool | None = None
  67. synthetic: bool | None = None
  68. meta: dict | None = None
  69. data: dict | None = None
  70. class StackTraceFrame(LaxIngestSchema):
  71. filename: str | None = None
  72. function: str | None = None
  73. raw_function: str | None = None
  74. module: str | None = None
  75. lineno: int | None = None
  76. colno: int | None = None
  77. abs_path: str | None = None
  78. context_line: str | None = None
  79. pre_context: list[str | None] | None = None
  80. post_context: list[str | None] | None = None
  81. source_link: str | None = None
  82. in_app: bool | None = None
  83. stack_start: bool | None = None
  84. vars: dict[str, Union[str, dict, list]] | None = None
  85. instruction_addr: str | None = None
  86. addr_mode: str | None = None
  87. symbol_addr: str | None = None
  88. image_addr: str | None = None
  89. package: str | None = None
  90. platform: str | None = None
  91. def is_url(self, filename: str) -> bool:
  92. return filename.startswith(("file:", "http:", "https:", "applewebdata:"))
  93. @model_validator(mode="after")
  94. def normalize_files(self):
  95. if not self.abs_path and self.filename:
  96. self.abs_path = self.filename
  97. if self.filename and self.is_url(self.filename):
  98. self.filename = urlparse(self.filename).path
  99. return self
  100. @field_validator("pre_context", "post_context")
  101. @classmethod
  102. def replace_null(cls, context: list[str | None]) -> list[str | None] | None:
  103. if context:
  104. return [line if line else "" for line in context]
  105. return None
  106. class StackTrace(LaxIngestSchema):
  107. frames: list[StackTraceFrame]
  108. registers: dict[str, str] | None = None
  109. class EventException(LaxIngestSchema):
  110. type: str | None = None
  111. value: Annotated[str | None, WrapValidator(invalid_to_none)] = None
  112. module: str | None = None
  113. thread_id: str | None = None
  114. mechanism: Annotated[ExceptionMechanism | None, WrapValidator(invalid_to_none)] = (
  115. None
  116. )
  117. stacktrace: Annotated[StackTrace | None, WrapValidator(invalid_to_none)] = None
  118. @model_validator(mode="after")
  119. def check_type_value(self):
  120. if self.type is None and self.value is None:
  121. return None
  122. return self
  123. class ValueEventException(LaxIngestSchema):
  124. values: list[EventException]
  125. @field_validator("values")
  126. @classmethod
  127. def strip_null(cls, v: list[EventException]) -> list[EventException]:
  128. return [e for e in v if e is not None]
  129. class EventMessage(LaxIngestSchema):
  130. formatted: str = Field(max_length=8192, default="")
  131. message: str | None = None
  132. params: list[CoercedStr] | dict[str, str] | None = None
  133. @model_validator(mode="after")
  134. def set_formatted(self) -> "EventMessage":
  135. """
  136. When the EventMessage formatted string is not set,
  137. attempt to set it based on message and params interpolation
  138. """
  139. if not self.formatted and self.message:
  140. params = self.params
  141. if isinstance(params, list) and params:
  142. try:
  143. formatted_params = tuple(
  144. int(p) if isinstance(p, str) and p.isdigit() else p
  145. for p in params
  146. )
  147. self.formatted = self.message % tuple(formatted_params)
  148. except TypeError:
  149. pass
  150. elif isinstance(params, dict):
  151. self.formatted = self.message.format(**params)
  152. return self
  153. class EventTemplate(LaxIngestSchema):
  154. lineno: int
  155. abs_path: str | None = None
  156. filename: str
  157. context_line: str
  158. pre_context: list[str] | None = None
  159. post_context: list[str] | None = None
  160. # Important, for some reason using Schema will cause the DebugImage union not to work
  161. class SourceMapImage(BaseModel):
  162. type: Literal["sourcemap"]
  163. code_file: str
  164. debug_id: uuid.UUID
  165. # Important, for some reason using Schema will cause the DebugImage union not to work
  166. class OtherDebugImage(BaseModel):
  167. type: str
  168. DebugImage = Annotated[SourceMapImage, Field(discriminator="type")] | OtherDebugImage
  169. class DebugMeta(LaxIngestSchema):
  170. images: list[DebugImage]
  171. class ValueEventBreadcrumb(LaxIngestSchema):
  172. values: list[EventBreadcrumb]
  173. class ClientSDKPackage(LaxIngestSchema):
  174. name: str | None = None
  175. version: str | None = None
  176. class ClientSDKInfo(LaxIngestSchema):
  177. integrations: list[str | None] | None = None
  178. name: str | None
  179. packages: list[ClientSDKPackage] | None = None
  180. version: str | None
  181. @field_validator("packages", mode="before")
  182. def name_must_contain_space(cls, v: Any) -> Any:
  183. return coerce_list(v)
  184. class RequestHeaders(LaxIngestSchema):
  185. content_type: str | None
  186. class RequestEnv(LaxIngestSchema):
  187. remote_addr: str | None
  188. QueryString = str | ListKeyValue | dict[str, str | dict[str, Any] | None]
  189. """Raw URL querystring, list, or dict"""
  190. KeyValueFormat = Union[list[list[str | None]], dict[str, CoercedStr | None]]
  191. """
  192. key-values in list or dict format. Example {browser: firefox} or [[browser, firefox]]
  193. """
  194. class IngestRequest(BaseRequest):
  195. headers: KeyValueFormat | None = None
  196. query_string: QueryString | None = None
  197. @field_validator("headers", mode="before")
  198. @classmethod
  199. def fix_non_standard_headers(cls, v):
  200. """
  201. Fix non-documented format used by PHP Sentry Client
  202. Convert {"Foo": ["bar"]} into {"Foo: "bar"}
  203. """
  204. if isinstance(v, dict):
  205. return {
  206. key: value[0] if isinstance(value, list) else value
  207. for key, value in v.items()
  208. }
  209. return v
  210. @field_validator("query_string", "headers")
  211. @classmethod
  212. def prefer_list_key_value(
  213. cls, v: Union[QueryString, KeyValueFormat] | None
  214. ) -> ListKeyValue | None:
  215. """Store all querystring, header formats in a list format"""
  216. result: ListKeyValue | None = None
  217. if isinstance(v, str) and v: # It must be a raw querystring, parse it
  218. qs = parse_qs(v)
  219. result = [[key, value] for key, values in qs.items() for value in values]
  220. elif isinstance(v, dict): # Convert dict to list
  221. result = [[key, value] for key, value in v.items()]
  222. elif isinstance(v, list): # Normalize list (throw out any weird data)
  223. result = [item[:2] for item in v if len(item) >= 2]
  224. if result:
  225. # Remove empty and any key called "Cookie" which could be sensitive data
  226. entry_to_remove = ["Cookie", ""]
  227. return sorted(
  228. [entry for entry in result if entry != entry_to_remove],
  229. key=lambda x: (x[0], x[1]),
  230. )
  231. return result
  232. class IngestIssueEvent(BaseIssueEvent):
  233. timestamp: datetime = Field(default_factory=now)
  234. level: str | None = "error"
  235. logentry: EventMessage | None = None
  236. logger: str | None = None
  237. transaction: str | None = Field(
  238. validation_alias=AliasChoices("transaction", "culprit"), default=None
  239. )
  240. server_name: str | None = None
  241. release: str | None = None
  242. dist: str | None = None
  243. tags: KeyValueFormat | None = None
  244. environment: str | None = None
  245. modules: dict[str, str | None] | None = None
  246. extra: dict[str, Any] | None = None
  247. fingerprint: list[Union[str, None]] | None = None
  248. errors: list[Any] | None = None
  249. exception: list[EventException] | ValueEventException | None = None
  250. message: Union[str, EventMessage] | None = None
  251. template: EventTemplate | None = None
  252. breadcrumbs: Union[list[EventBreadcrumb], ValueEventBreadcrumb] | None = None
  253. sdk: ClientSDKInfo | None = None
  254. request: IngestRequest | None = None
  255. contexts: Contexts | None = None
  256. user: EventUser | None = None
  257. debug_meta: DebugMeta | None = None
  258. @field_validator("tags")
  259. @classmethod
  260. def prefer_dict(cls, v: KeyValueFormat | None) -> dict[str, str | None] | None:
  261. if isinstance(v, list):
  262. return {key: value for key, value in v if key is not None}
  263. return v
  264. class EventIngestSchema(IngestIssueEvent):
  265. event_id: uuid.UUID
  266. class TransactionEventSchema(LaxIngestSchema):
  267. type: Literal["transaction"] = "transaction"
  268. contexts: JsonValue
  269. measurements: JsonValue | None = None
  270. start_timestamp: datetime
  271. timestamp: datetime
  272. transaction: str
  273. # # SentrySDKEventSerializer
  274. breadcrumbs: JsonValue | None = None
  275. fingerprint: list[str] | None = None
  276. tags: KeyValueFormat | None = None
  277. event_id: uuid.UUID = Field(default_factory=uuid.uuid4)
  278. extra: JsonValue | None = None
  279. request: IngestRequest | None = None
  280. server_name: str | None = None
  281. sdk: ClientSDKInfo | None = None
  282. platform: str | None
  283. release: str | None = None
  284. environment: str | None = None
  285. _meta: JsonValue | None
  286. class EnvelopeHeaderSchema(LaxIngestSchema):
  287. event_id: uuid.UUID | None = None
  288. dsn: str | None = None
  289. sdk: ClientSDKInfo | None = None
  290. sent_at: datetime = Field(default_factory=now)
  291. SupportedItemType = Literal["transaction", "event"]
  292. IgnoredItemType = Literal[
  293. "session", "sessions", "client_report", "attachment", "user_report", "check_in"
  294. ]
  295. SUPPORTED_ITEMS = typing.get_args(SupportedItemType)
  296. class ItemHeaderSchema(LaxIngestSchema):
  297. content_type: str | None = None
  298. type: Union[SupportedItemType, IgnoredItemType]
  299. length: int | None = None
  300. class EnvelopeSchema(RootModel[list[dict[str, Any]]]):
  301. root: list[dict[str, Any]]
  302. _header: EnvelopeHeaderSchema
  303. _items: list[
  304. tuple[ItemHeaderSchema, IngestIssueEvent | TransactionEventSchema]
  305. ] = []
  306. @model_validator(mode="after")
  307. def validate_envelope(self) -> "EnvelopeSchema":
  308. data = self.root
  309. try:
  310. header = data.pop(0)
  311. except IndexError:
  312. raise ValidationError([{"message": "Envelope is empty"}])
  313. self._header = EnvelopeHeaderSchema(**header)
  314. while len(data) >= 2:
  315. item_header_data = data.pop(0)
  316. if item_header_data.get("type", None) not in SUPPORTED_ITEMS:
  317. continue
  318. item_header = ItemHeaderSchema(**item_header_data)
  319. if item_header.type == "event":
  320. try:
  321. item = IngestIssueEvent(**data.pop(0))
  322. except ValidationError as err:
  323. logger.warning("Envelope Event item invalid", exc_info=True)
  324. raise err
  325. self._items.append((item_header, item))
  326. elif item_header.type == "transaction":
  327. item = TransactionEventSchema(**data.pop(0))
  328. self._items.append((item_header, item))
  329. return self
  330. class CSPReportSchema(LaxIngestSchema):
  331. """
  332. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy-Report-Only#violation_report_syntax
  333. """
  334. blocked_uri: str = Field(alias="blocked-uri")
  335. disposition: Literal["enforce", "report"] = Field(alias="disposition")
  336. document_uri: str = Field(alias="document-uri")
  337. effective_directive: str = Field(alias="effective-directive")
  338. original_policy: str | None = Field(alias="original-policy")
  339. script_sample: str | None = Field(alias="script-sample", default=None)
  340. status_code: int | None = Field(alias="status-code")
  341. line_number: int | None = None
  342. column_number: int | None = None
  343. class SecuritySchema(LaxIngestSchema):
  344. csp_report: CSPReportSchema = Field(alias="csp-report")
  345. ## Normalized Interchange Issue Events
  346. class IssueEventSchema(IngestIssueEvent):
  347. """
  348. Event storage and interchange format
  349. Used in json view and celery interchange
  350. Don't use this for api intake
  351. """
  352. type: Literal[IssueEventType.DEFAULT] = IssueEventType.DEFAULT
  353. class ErrorIssueEventSchema(IngestIssueEvent):
  354. type: Literal[IssueEventType.ERROR] = IssueEventType.ERROR
  355. class CSPIssueEventSchema(IngestIssueEvent):
  356. type: Literal[IssueEventType.CSP] = IssueEventType.CSP
  357. csp: CSPReportSchema
  358. class InterchangeEvent(LaxIngestSchema):
  359. """Normalized wrapper around issue event. Event should not contain repeat information."""
  360. event_id: uuid.UUID = Field(default_factory=uuid.uuid4)
  361. project_id: int
  362. organization_id: int
  363. received: datetime = Field(default_factory=now)
  364. payload: (
  365. IssueEventSchema
  366. | ErrorIssueEventSchema
  367. | CSPIssueEventSchema
  368. | TransactionEventSchema
  369. ) = Field(discriminator="type")
  370. class InterchangeIssueEvent(InterchangeEvent):
  371. payload: (
  372. IssueEventSchema
  373. | ErrorIssueEventSchema
  374. | CSPIssueEventSchema
  375. | TransactionEventSchema
  376. ) = Field(discriminator="type")
  377. class InterchangeTransactionEvent(InterchangeEvent):
  378. payload: TransactionEventSchema