split-silo-database 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #!/usr/bin/env python
  2. import click
  3. import docker
  4. from django.apps import apps
  5. from sentry.runner import configure
  6. from sentry.silo.base import SiloMode
  7. configure()
  8. from django.conf import settings
  9. from sentry.models.organizationmapping import OrganizationMapping
  10. def exec_run(container, command):
  11. wrapped_command = f'sh -c "{" ".join(command)}"'
  12. exit_code, output = container.exec_run(cmd=wrapped_command, stdout=True, stderr=True)
  13. if exit_code:
  14. click.echo("Container operation Failed!")
  15. click.echo(f"Container operation failed with {output}")
  16. return output
  17. def split_database(tables: list[str], source: str, destination: str, reset: bool, verbose: bool):
  18. click.echo(f">> Dumping tables from {source} database")
  19. command = ["pg_dump", "-U", "postgres", "-d", source, "--clean"]
  20. for table in tables:
  21. command.extend(["-t", table])
  22. command.extend([">", f"/tmp/{destination}-tables.sql"])
  23. client = docker.from_env()
  24. postgres = client.containers.get("sentry_postgres")
  25. if verbose:
  26. click.echo(f">> Running {' '.join(command)}")
  27. exec_run(postgres, command)
  28. if reset:
  29. click.echo(f">> Dropping existing {destination} database")
  30. exec_run(postgres, ["dropdb", "-U", "postgres", "--if-exists", destination])
  31. exec_run(postgres, ["createdb", "-U", "postgres", destination])
  32. citext_command = [
  33. "psql",
  34. "-U",
  35. "postgres",
  36. destination,
  37. "-c",
  38. "'CREATE EXTENSION IF NOT EXISTS citext'",
  39. ]
  40. if verbose:
  41. click.echo(f">> RUNNING: {' '.join(citext_command)}")
  42. exec_run(postgres, citext_command)
  43. # Use the dump file to build control silo tables.
  44. click.echo(f">> Building {destination} database from dump file")
  45. import_command = ["psql", "-U", "postgres", destination, "<", f"/tmp/{destination}-tables.sql"]
  46. if verbose:
  47. click.echo(f">> Running {' '.join(import_command)}")
  48. exec_run(postgres, import_command)
  49. if destination == "region" and reset:
  50. click.echo(">> Cloning stored procedures")
  51. function_dump = [
  52. "psql",
  53. "-U",
  54. "postgres",
  55. source,
  56. "-c",
  57. "'\\sf sentry_increment_project_counter'",
  58. ]
  59. function_sql = exec_run(postgres, function_dump)
  60. import_function = [
  61. "psql",
  62. "-U",
  63. "postgres",
  64. destination,
  65. "-c",
  66. "'" + function_sql.decode("utf8") + "'",
  67. ]
  68. exec_run(postgres, import_function)
  69. def revise_organization_mappings(legacy_region_name: str):
  70. if settings.SENTRY_MONOLITH_REGION == legacy_region_name:
  71. click.echo(
  72. "> No OrganizationMapping have been modified. Set 'SENTRY_MONOLITH_REGION' in sentry.conf.py to update monolith mappings."
  73. )
  74. else:
  75. qs = OrganizationMapping.objects.filter(region_name=legacy_region_name)
  76. record_count = len(qs)
  77. qs.update(region_name=settings.SENTRY_MONOLITH_REGION)
  78. click.echo(
  79. f"> {record_count} OrganizationMapping record(s) have been updated from '{legacy_region_name}' to '{settings.SENTRY_MONOLITH_REGION}'"
  80. )
  81. @click.command()
  82. @click.option(
  83. "--legacy-region-name",
  84. default="--monolith--",
  85. help="Previous value of settings.SENTRY_MONOLITH_REGION to overwrite in organization mappings",
  86. )
  87. @click.option("--verbose", default=False, is_flag=True, help="Enable verbose logging")
  88. @click.option(
  89. "--reset",
  90. default=False,
  91. is_flag=True,
  92. help="Reset the target databases to be empty before loading extracted data and schema.",
  93. )
  94. @click.option("--database", default="sentry", help="Which database to derive splits from")
  95. def main(database: str, reset: bool, verbose: bool, legacy_region_name: str):
  96. """
  97. This is a development tool that can convert a monolith database into
  98. control + region databases by using silo annotations.
  99. This operation will not modify the original source database.
  100. """
  101. # We have a few tables that either need to be in both silos,
  102. # or only in control. These tables don't have silo annotations
  103. # as they are inherited from django and their silo assignments
  104. # need to be manually defined.
  105. region_tables = ["django_migrations", "django_content_type"]
  106. control_tables = [
  107. "django_migrations",
  108. "django_admin_log",
  109. "django_content_type",
  110. "django_site",
  111. "django_session",
  112. "auth_user",
  113. "auth_group",
  114. "auth_permission",
  115. "auth_group_permissions",
  116. "auth_user_groups",
  117. "auth_user_user_permissions",
  118. ]
  119. for model in apps.get_models():
  120. silo_limit = getattr(model._meta, "silo_limit", None)
  121. if not silo_limit:
  122. click.echo(f"> Could not find silo assignment for {model._meta.db_table}")
  123. continue
  124. if SiloMode.CONTROL in silo_limit.modes:
  125. control_tables.append(model._meta.db_table)
  126. if SiloMode.REGION in silo_limit.modes:
  127. region_tables.append(model._meta.db_table)
  128. revise_organization_mappings(legacy_region_name=legacy_region_name)
  129. split_database(control_tables, database, "control", reset=reset, verbose=verbose)
  130. split_database(region_tables, database, "region", reset=reset, verbose=verbose)
  131. if __name__ == "__main__":
  132. main()