split-silo-database 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #!/usr/bin/env python
  2. import click
  3. from django.apps import apps
  4. from sentry.runner import configure
  5. from sentry.runner.commands.devservices import get_docker_client
  6. from sentry.silo.base import SiloMode
  7. configure()
  8. from django.conf import settings
  9. from sentry.models.organizationmapping import OrganizationMapping
  10. def exec_run(container, command):
  11. wrapped_command = f'sh -c "{" ".join(command)}"'
  12. exit_code, output = container.exec_run(cmd=wrapped_command, stdout=True, stderr=True)
  13. if exit_code:
  14. click.echo("Container operation Failed!")
  15. click.echo(f"Container operation failed with {output}")
  16. return output
  17. def split_database(tables: list[str], source: str, destination: str, reset: bool, verbose: bool):
  18. click.echo(f">> Dumping tables from {source} database")
  19. command = ["pg_dump", "-U", "postgres", "-d", source, "--clean"]
  20. for table in tables:
  21. command.extend(["-t", table])
  22. command.extend([">", f"/tmp/{destination}-tables.sql"])
  23. with get_docker_client() as client:
  24. postgres = client.containers.get("sentry_postgres")
  25. if verbose:
  26. click.echo(f">> Running {' '.join(command)}")
  27. exec_run(postgres, command)
  28. if reset:
  29. click.echo(f">> Dropping existing {destination} database")
  30. exec_run(postgres, ["dropdb", "-U", "postgres", "--if-exists", destination])
  31. exec_run(postgres, ["createdb", "-U", "postgres", destination])
  32. citext_command = [
  33. "psql",
  34. "-U",
  35. "postgres",
  36. destination,
  37. "-c",
  38. "'CREATE EXTENSION IF NOT EXISTS citext'",
  39. ]
  40. if verbose:
  41. click.echo(f">> RUNNING: {' '.join(citext_command)}")
  42. exec_run(postgres, citext_command)
  43. # Use the dump file to build control silo tables.
  44. click.echo(f">> Building {destination} database from dump file")
  45. import_command = [
  46. "psql",
  47. "-U",
  48. "postgres",
  49. destination,
  50. "<",
  51. f"/tmp/{destination}-tables.sql",
  52. ]
  53. if verbose:
  54. click.echo(f">> Running {' '.join(import_command)}")
  55. exec_run(postgres, import_command)
  56. if destination == "region" and reset:
  57. click.echo(">> Cloning stored procedures")
  58. function_dump = [
  59. "psql",
  60. "-U",
  61. "postgres",
  62. source,
  63. "-c",
  64. "'\\sf sentry_increment_project_counter'",
  65. ]
  66. function_sql = exec_run(postgres, function_dump)
  67. import_function = [
  68. "psql",
  69. "-U",
  70. "postgres",
  71. destination,
  72. "-c",
  73. "'" + function_sql.decode("utf8") + "'",
  74. ]
  75. exec_run(postgres, import_function)
  76. def revise_organization_mappings(legacy_region_name: str):
  77. if settings.SENTRY_MONOLITH_REGION == legacy_region_name:
  78. click.echo(
  79. "> No OrganizationMapping have been modified. Set 'SENTRY_MONOLITH_REGION' in sentry.conf.py to update monolith mappings."
  80. )
  81. else:
  82. qs = OrganizationMapping.objects.filter(region_name=legacy_region_name)
  83. record_count = len(qs)
  84. qs.update(region_name=settings.SENTRY_MONOLITH_REGION)
  85. click.echo(
  86. f"> {record_count} OrganizationMapping record(s) have been updated from '{legacy_region_name}' to '{settings.SENTRY_MONOLITH_REGION}'"
  87. )
  88. @click.command()
  89. @click.option(
  90. "--legacy-region-name",
  91. default="--monolith--",
  92. help="Previous value of settings.SENTRY_MONOLITH_REGION to overwrite in organization mappings",
  93. )
  94. @click.option("--verbose", default=False, is_flag=True, help="Enable verbose logging")
  95. @click.option(
  96. "--reset",
  97. default=False,
  98. is_flag=True,
  99. help="Reset the target databases to be empty before loading extracted data and schema.",
  100. )
  101. @click.option("--database", default="sentry", help="Which database to derive splits from")
  102. def main(database: str, reset: bool, verbose: bool, legacy_region_name: str):
  103. """
  104. This is a development tool that can convert a monolith database into
  105. control + region databases by using silo annotations.
  106. This operation will not modify the original source database.
  107. """
  108. # We have a few tables that either need to be in both silos,
  109. # or only in control. These tables don't have silo annotations
  110. # as they are inherited from django and their silo assignments
  111. # need to be manually defined.
  112. region_tables = ["django_migrations", "django_content_type"]
  113. control_tables = [
  114. "django_migrations",
  115. "django_admin_log",
  116. "django_content_type",
  117. "django_site",
  118. "django_session",
  119. "auth_user",
  120. "auth_group",
  121. "auth_permission",
  122. "auth_group_permissions",
  123. "auth_user_groups",
  124. "auth_user_user_permissions",
  125. ]
  126. for model in apps.get_models():
  127. silo_limit = getattr(model._meta, "silo_limit", None)
  128. if not silo_limit:
  129. click.echo(f"> Could not find silo assignment for {model._meta.db_table}")
  130. continue
  131. if SiloMode.CONTROL in silo_limit.modes:
  132. control_tables.append(model._meta.db_table)
  133. if SiloMode.REGION in silo_limit.modes:
  134. region_tables.append(model._meta.db_table)
  135. revise_organization_mappings(legacy_region_name=legacy_region_name)
  136. split_database(control_tables, database, "control", reset=reset, verbose=verbose)
  137. split_database(region_tables, database, "region", reset=reset, verbose=verbose)
  138. if __name__ == "__main__":
  139. main()