schema.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. import logging
  2. import typing
  3. import uuid
  4. from datetime import datetime
  5. from typing import Annotated, Any, Literal, Optional, Union
  6. from urllib.parse import parse_qs
  7. from django.utils.timezone import now
  8. from ninja import Field
  9. from ninja import Schema as BaseSchema
  10. from pydantic import (
  11. AliasChoices,
  12. RootModel,
  13. ValidationError,
  14. WrapValidator,
  15. field_validator,
  16. model_validator,
  17. )
  18. from apps.issue_events.constants import IssueEventType
  19. from ..common_event_schema import (
  20. BaseIssueEvent,
  21. BaseRequest,
  22. EventBreadcrumb,
  23. ListKeyValue,
  24. )
  25. from ..common_event_utils import invalid_to_none
  26. logger = logging.getLogger(__name__)
  27. class Schema(BaseSchema):
  28. """Schema configuration for all event ingest schemas"""
  29. class Config(BaseSchema.Config):
  30. coerce_numbers_to_str = True # Lax is best for ingest
  31. class TagKeyValue(Schema):
  32. key: str
  33. value: str
  34. class Signal(Schema):
  35. number: int
  36. code: Optional[int]
  37. name: Optional[str]
  38. code_name: Optional[str]
  39. class MachException(Schema):
  40. number: int
  41. code: int
  42. subcode: int
  43. name: Optional[str]
  44. class NSError(Schema):
  45. code: int
  46. domain: str
  47. class Errno(Schema):
  48. number: int
  49. name: Optional[str]
  50. class MechanismMeta(Schema):
  51. signal: Optional[Signal] = None
  52. match_exception: Optional[MachException] = None
  53. ns_error: Optional[NSError] = None
  54. errno: Optional[Errno] = None
  55. class ExceptionMechanism(Schema):
  56. type: str
  57. description: Optional[str] = None
  58. help_link: Optional[str] = None
  59. handled: Optional[bool] = None
  60. synthetic: Optional[bool] = None
  61. meta: Optional[dict] = None
  62. data: Optional[dict] = None
  63. class StackTraceFrame(Schema):
  64. filename: Optional[str] = None
  65. function: Optional[str] = None
  66. raw_function: Optional[str] = None
  67. module: Optional[str] = None
  68. lineno: Optional[int] = None
  69. colno: Optional[int] = None
  70. abs_path: Optional[str] = None
  71. context_line: Optional[str] = None
  72. pre_context: Optional[list[str]] = None
  73. post_context: Optional[list[str]] = None
  74. source_link: Optional[str] = None
  75. in_app: Optional[bool] = None
  76. stack_start: Optional[bool] = None
  77. vars: Optional[dict[str, Union[str, dict, list]]] = None
  78. instruction_addr: Optional[str] = None
  79. addr_mode: Optional[str] = None
  80. symbol_addr: Optional[str] = None
  81. image_addr: Optional[str] = None
  82. package: Optional[str] = None
  83. platform: Optional[str] = None
  84. class StackTrace(Schema):
  85. frames: list[StackTraceFrame]
  86. registers: Optional[dict[str, str]] = None
  87. class EventException(Schema):
  88. type: str
  89. value: Annotated[Optional[str], WrapValidator(invalid_to_none)]
  90. module: Optional[str] = None
  91. thread_id: Optional[str] = None
  92. mechanism: Optional[ExceptionMechanism] = None
  93. stacktrace: Optional[StackTrace] = None
  94. class ValueEventException(Schema):
  95. values: list[EventException]
  96. class EventMessage(Schema):
  97. formatted: str = Field(max_length=8192, default="")
  98. message: Optional[str] = None
  99. params: Optional[Union[list[str], dict[str, str]]] = None
  100. @model_validator(mode="after")
  101. def set_formatted(self) -> "EventMessage":
  102. """
  103. When the EventMessage formatted string is not set,
  104. attempt to set it based on message and params interpolation
  105. """
  106. if not self.formatted and self.message:
  107. params = self.params
  108. if isinstance(params, list) and params is not None:
  109. self.formatted = self.message % tuple(params)
  110. elif isinstance(params, dict):
  111. self.formatted = self.message.format(**params)
  112. return self
  113. class EventTemplate(Schema):
  114. lineno: int
  115. abs_path: Optional[str] = None
  116. filename: str
  117. context_line: str
  118. pre_context: Optional[list[str]] = None
  119. post_context: Optional[list[str]] = None
  120. class ValueEventBreadcrumb(Schema):
  121. values: list[EventBreadcrumb]
  122. class ClientSDKPackage(Schema):
  123. name: Optional[str] = None
  124. version: Optional[str] = None
  125. class ClientSDKInfo(Schema):
  126. integrations: Optional[list[Optional[str]]] = None
  127. name: Optional[str]
  128. packages: Optional[list[ClientSDKPackage]] = None
  129. version: Optional[str]
  130. class RequestHeaders(Schema):
  131. content_type: Optional[str]
  132. class RequestEnv(Schema):
  133. remote_addr: Optional[str]
  134. QueryString = Union[str, ListKeyValue, dict[str, Optional[str]]]
  135. """Raw URL querystring, list, or dict"""
  136. Headers = Union[list[list[Optional[str]]], dict[str, Optional[str]]]
  137. """Header in list or dict format, expected to normalize to list"""
  138. class IngestRequest(BaseRequest):
  139. headers: Optional[Headers] = None
  140. query_string: Optional[QueryString] = None
  141. @field_validator("headers", mode="before")
  142. @classmethod
  143. def fix_non_standard_headers(cls, v):
  144. """
  145. Fix non-documented format used by PHP Sentry Client
  146. Convert {"Foo": ["bar"]} into {"Foo: "bar"}
  147. """
  148. if isinstance(v, dict):
  149. return {
  150. key: value[0] if isinstance(value, list) else value
  151. for key, value in v.items()
  152. }
  153. return v
  154. @field_validator("query_string", "headers")
  155. @classmethod
  156. def prefer_list_key_value(
  157. cls, v: Optional[Union[QueryString, Headers]]
  158. ) -> Optional[ListKeyValue]:
  159. """Store all querystring, header formats in a list format"""
  160. result: Optional[ListKeyValue] = None
  161. if isinstance(v, str) and v: # It must be a raw querystring, parse it
  162. qs = parse_qs(v)
  163. result = [[key, value] for key, values in qs.items() for value in values]
  164. elif isinstance(v, dict): # Convert dict to list
  165. result = [[key, value] for key, value in v.items()]
  166. elif isinstance(v, list): # Normalize list (throw out any weird data)
  167. result = [item[:2] for item in v if len(item) >= 2]
  168. if result:
  169. # Remove empty and any key called "Cookie" which could be sensitive data
  170. entry_to_remove = ["Cookie", ""]
  171. return sorted(
  172. [entry for entry in result if entry != entry_to_remove],
  173. key=lambda x: (x[0], x[1]),
  174. )
  175. return result
  176. class IngestIssueEvent(BaseIssueEvent):
  177. timestamp: datetime = Field(default_factory=now)
  178. level: Optional[str] = "error"
  179. logentry: Optional[EventMessage] = None
  180. logger: Optional[str] = None
  181. transaction: Optional[str] = Field(
  182. validation_alias=AliasChoices("transaction", "culprit"), default=None
  183. )
  184. server_name: Optional[str] = None
  185. release: Optional[str] = None
  186. dist: Optional[str] = None
  187. tags: Optional[Union[dict[str, str], list[TagKeyValue]]] = None
  188. environment: Optional[str] = None
  189. modules: Optional[dict[str, Optional[str]]] = None
  190. extra: Optional[Any] = None
  191. fingerprint: Optional[list[str]] = None
  192. errors: Optional[list[Any]] = None
  193. exception: Optional[Union[list[EventException], ValueEventException]] = None
  194. message: Optional[Union[str, EventMessage]] = None
  195. template: Optional[EventTemplate] = None
  196. breadcrumbs: Optional[Union[list[EventBreadcrumb], ValueEventBreadcrumb]] = None
  197. sdk: Optional[ClientSDKInfo] = None
  198. request: Optional[IngestRequest] = None
  199. class EventIngestSchema(IngestIssueEvent):
  200. event_id: uuid.UUID
  201. class EnvelopeHeaderSchema(Schema):
  202. event_id: uuid.UUID
  203. dsn: Optional[str] = None
  204. sdk: Optional[ClientSDKInfo] = None
  205. sent_at: datetime = Field(default_factory=now)
  206. SupportedItemType = Literal["transaction", "event"]
  207. SUPPORTED_ITEMS = typing.get_args(SupportedItemType)
  208. class ItemHeaderSchema(Schema):
  209. content_type: Optional[str]
  210. type: SupportedItemType
  211. length: Optional[int]
  212. class EnvelopeSchema(RootModel[list[dict[str, Any]]]):
  213. root: list[dict[str, Any]]
  214. _header: EnvelopeHeaderSchema
  215. _items: list[tuple[ItemHeaderSchema, IngestIssueEvent]] = []
  216. @model_validator(mode="after")
  217. def validate_envelope(self) -> "EnvelopeSchema":
  218. data = self.root
  219. try:
  220. header = data.pop(0)
  221. except IndexError:
  222. raise ValidationError([{"message": "Envelope is empty"}])
  223. self._header = EnvelopeHeaderSchema(**header)
  224. while len(data) >= 2:
  225. item_header_data = data.pop(0)
  226. if item_header_data.get("type", None) not in SUPPORTED_ITEMS:
  227. continue
  228. item_header = ItemHeaderSchema(**item_header_data)
  229. if item_header.type == "event":
  230. try:
  231. item = IngestIssueEvent(**data.pop(0))
  232. except ValidationError as err:
  233. logger.warning("Envelope Event item invalid", exc_info=True)
  234. raise err
  235. self._items.append((item_header, item))
  236. return self
  237. class CSPReportSchema(Schema):
  238. """
  239. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy-Report-Only#violation_report_syntax
  240. """
  241. blocked_uri: str = Field(alias="blocked-uri")
  242. disposition: Literal["enforce", "report"] = Field(alias="disposition")
  243. document_uri: str = Field(alias="document-uri")
  244. effective_directive: str = Field(alias="effective-directive")
  245. original_policy: Optional[str] = Field(alias="original-policy")
  246. script_sample: Optional[str] = Field(alias="script-sample", default=None)
  247. status_code: Optional[int] = Field(alias="status-code")
  248. line_number: Optional[int] = None
  249. column_number: Optional[int] = None
  250. class SecuritySchema(Schema):
  251. csp_report: CSPReportSchema = Field(alias="csp-report")
  252. ## Normalized Interchange Issue Events
  253. class IssueEventSchema(IngestIssueEvent):
  254. """
  255. Event storage and interchange format
  256. Used in json view and celery interchange
  257. Don't use this for api intake
  258. """
  259. type: Literal[IssueEventType.DEFAULT] = IssueEventType.DEFAULT
  260. class ErrorIssueEventSchema(IngestIssueEvent):
  261. type: Literal[IssueEventType.ERROR] = IssueEventType.ERROR
  262. class CSPIssueEventSchema(IngestIssueEvent):
  263. type: Literal[IssueEventType.CSP] = IssueEventType.CSP
  264. csp: CSPReportSchema
  265. class InterchangeIssueEvent(Schema):
  266. """Normalized wrapper around issue event. Event should not contain repeat information."""
  267. event_id: uuid.UUID = Field(default_factory=uuid.uuid4)
  268. project_id: int
  269. received: datetime = Field(default_factory=now)
  270. payload: Union[
  271. IssueEventSchema, ErrorIssueEventSchema, CSPIssueEventSchema
  272. ] = Field(discriminator="type")