make_sample_issues.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. import random
  2. from datetime import timedelta
  3. from django.contrib.postgres.search import SearchVector
  4. from django.db.models import Value
  5. from django.utils import timezone
  6. from apps.issue_events.models import (
  7. Issue,
  8. IssueEvent,
  9. IssueEventType,
  10. IssueTag,
  11. TagKey,
  12. TagValue,
  13. )
  14. from glitchtip.base_commands import MakeSampleCommand
  15. from glitchtip.utils import get_random_string
  16. from .issue_generator import CULPRITS, EXCEPTIONS, SDKS, TITLE_CHOICES, generate_tags
  17. class Command(MakeSampleCommand):
  18. help = "Create sample issues and events for dev and demonstration purposes"
  19. events_quantity_per: int
  20. def add_arguments(self, parser):
  21. self.add_org_project_arguments(parser)
  22. parser.add_argument("--issue-quantity", type=int, default=100)
  23. parser.add_argument(
  24. "--events-quantity-per",
  25. type=int,
  26. help="Defaults to a random amount from 1-100",
  27. )
  28. parser.add_argument(
  29. "--tag-keys-per-event", type=int, default=0, help="Extra random tag keys"
  30. )
  31. parser.add_argument(
  32. "--tag-values-per-key", type=int, default=1, help="Extra random tag values"
  33. )
  34. parser.add_argument(
  35. "--over-days",
  36. type=int,
  37. default=1,
  38. help="Make events received datetime show up over x days",
  39. )
  40. def get_events_count(self) -> int:
  41. if count := self.events_quantity_per:
  42. return count
  43. return random.randint(1, 100)
  44. def create_events_and_issues(
  45. self, issues: list[Issue], issue_events: list[list[IssueEvent]]
  46. ):
  47. issues = Issue.objects.bulk_create(issues)
  48. # Assign issue to each event
  49. for i, issue in enumerate(issues):
  50. events = issue_events[i]
  51. for event in events:
  52. event.issue = issue
  53. flat_events = [x for xs in issue_events for x in xs]
  54. IssueEvent.objects.bulk_create(flat_events)
  55. keys = {
  56. key for issue_event in issue_events for key in issue_event[0].tags.keys()
  57. }
  58. values = {
  59. value
  60. for issue_event in issue_events
  61. for value in issue_event[0].tags.values()
  62. }
  63. TagKey.objects.bulk_create(
  64. [TagKey(key=key) for key in keys], ignore_conflicts=True
  65. )
  66. TagValue.objects.bulk_create(
  67. [TagValue(value=value) for value in values], ignore_conflicts=True
  68. )
  69. tag_keys = {
  70. tag["key"]: tag["id"]
  71. for tag in TagKey.objects.filter(key__in=keys).values()
  72. }
  73. tag_values = {
  74. tag["value"]: tag["id"]
  75. for tag in TagValue.objects.filter(value__in=values).values()
  76. }
  77. issue_tags = []
  78. for i, issue in enumerate(issues):
  79. events = issue_events[i]
  80. tags = events[0].tags
  81. for tag_key, tag_value in tags.items():
  82. tag_key_id = tag_keys[tag_key]
  83. tag_value_id = tag_values[tag_value]
  84. tag_count = max(int(issue.count / 10), 1)
  85. # Create a few groups of IssueTags over time
  86. for _ in range(tag_count):
  87. # Rather than group to nearest minute, just make it random
  88. # To avoid conflicts. Good enough for performance testing.
  89. tag_date = issue.last_seen - timedelta(
  90. minutes=random.randint(0, 60),
  91. seconds=random.randint(0, 60),
  92. milliseconds=random.randint(0, 1000),
  93. microseconds=random.randint(0, 1000),
  94. )
  95. issue_tags.append(
  96. IssueTag(
  97. issue=issue,
  98. date=tag_date,
  99. tag_key_id=tag_key_id,
  100. tag_value_id=tag_value_id,
  101. count=tag_count,
  102. )
  103. )
  104. IssueTag.objects.bulk_create(issue_tags)
  105. self.progress_tick()
  106. def handle(self, *args, **options):
  107. super().handle(*args, **options)
  108. issue_quantity = options["issue_quantity"]
  109. over_days = options["over_days"]
  110. self.events_quantity_per = options["events_quantity_per"]
  111. now = timezone.now()
  112. start_time = now - timedelta(days=over_days)
  113. # timedelta between each new issue first_seen
  114. issue_delta = timedelta(seconds=over_days * 86400 / issue_quantity)
  115. # timedelta between each event for an issue
  116. if self.events_quantity_per:
  117. event_delta = issue_delta / self.events_quantity_per
  118. else:
  119. event_delta = issue_delta / 100
  120. # 10,000 per query is a good target
  121. average_events_per_issue = (
  122. self.events_quantity_per if self.events_quantity_per else 50
  123. )
  124. # Don't go lower than 1. >10,000 events per issue will perform worse
  125. issue_batch_size = max(10000 // average_events_per_issue, 1)
  126. random_tags = {
  127. get_random_string(): [
  128. get_random_string() for _ in range(options["tag_values_per_key"])
  129. ]
  130. for _ in range(options["tag_keys_per_event"])
  131. }
  132. issues: list[Issue] = []
  133. issue_events: list[list[IssueEvent]] = []
  134. for _ in range(issue_quantity):
  135. title = random.choice(TITLE_CHOICES) + " " + get_random_string()
  136. level = IssueEventType.ERROR
  137. culprit = random.choice(CULPRITS)
  138. event_count = self.get_events_count()
  139. # Include both realistic looking and random tags
  140. tags = generate_tags() | {
  141. tag: random.choice(value) for tag, value in random_tags.items()
  142. }
  143. first_seen = start_time
  144. last_seen = first_seen + event_delta * event_count
  145. start_time += issue_delta
  146. events: list[IssueEvent] = []
  147. timestamp = first_seen
  148. for _ in range(event_count):
  149. timestamp += event_delta
  150. received = timestamp + timezone.timedelta(milliseconds=1)
  151. events.append(
  152. IssueEvent(
  153. level=level,
  154. data={
  155. "title": title,
  156. "sdk": random.choice(SDKS),
  157. "culprit": culprit,
  158. "exception": random.choice(EXCEPTIONS),
  159. },
  160. timestamp=timestamp,
  161. received=received,
  162. tags=tags,
  163. )
  164. )
  165. issues.append(
  166. Issue(
  167. title=title,
  168. culprit=culprit,
  169. level=level,
  170. metadata={"title": title},
  171. first_seen=first_seen,
  172. last_seen=last_seen,
  173. project=self.project,
  174. search_vector=SearchVector(Value(title)),
  175. count=event_count,
  176. ),
  177. )
  178. issue_events.append(events)
  179. if len(issues) > issue_batch_size:
  180. self.create_events_and_issues(issues, issue_events)
  181. issues = []
  182. issue_events = []
  183. if issues:
  184. self.create_events_and_issues(issues, issue_events)
  185. self.success_message('Successfully created "%s" issues' % issue_quantity)