split-silo-database 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. #!/usr/bin/env python
  2. from typing import List
  3. import click
  4. import docker
  5. from django.apps import apps
  6. from sentry.runner import configure
  7. from sentry.silo.base import SiloMode
  8. configure()
  9. from django.conf import settings
  10. from sentry.models.organizationmapping import OrganizationMapping
  11. def exec_run(container, command):
  12. wrapped_command = f'sh -c "{" ".join(command)}"'
  13. exit_code, output = container.exec_run(cmd=wrapped_command, stdout=True, stderr=True)
  14. if exit_code:
  15. click.echo("Container operation Failed!")
  16. click.echo(f"Container operation failed with {output}")
  17. return output
  18. def split_database(tables: List[str], source: str, destination: str, reset: bool, verbose: bool):
  19. click.echo(f">> Dumping tables from {source} database")
  20. command = ["pg_dump", "-U", "postgres", "-d", source, "--clean"]
  21. for table in tables:
  22. command.extend(["-t", table])
  23. command.extend([">", f"/tmp/{destination}-tables.sql"])
  24. client = docker.from_env()
  25. postgres = client.containers.get("sentry_postgres")
  26. if verbose:
  27. click.echo(f">> Running {' '.join(command)}")
  28. exec_run(postgres, command)
  29. if reset:
  30. click.echo(f">> Dropping existing {destination} database")
  31. exec_run(postgres, ["dropdb", "-U", "postgres", "--if-exists", destination])
  32. exec_run(postgres, ["createdb", "-U", "postgres", destination])
  33. # Use the dump file to build control silo tables.
  34. click.echo(f">> Building {destination} database from dump file")
  35. import_command = ["psql", "-U", "postgres", destination, "<", f"/tmp/{destination}-tables.sql"]
  36. if verbose:
  37. click.echo(f">> Running {' '.join(import_command)}")
  38. exec_run(postgres, import_command)
  39. if destination == "region" and reset:
  40. click.echo(">> Cloning stored procedures")
  41. function_dump = [
  42. "psql",
  43. "-U",
  44. "postgres",
  45. source,
  46. "-c",
  47. "'\\sf sentry_increment_project_counter'",
  48. ]
  49. function_sql = exec_run(postgres, function_dump)
  50. import_function = [
  51. "psql",
  52. "-U",
  53. "postgres",
  54. destination,
  55. "-c",
  56. "'" + function_sql.decode("utf8") + "'",
  57. ]
  58. exec_run(postgres, import_function)
  59. def revise_organization_mappings(legacy_region_name: str):
  60. if settings.SENTRY_MONOLITH_REGION == legacy_region_name:
  61. click.echo(
  62. "> No OrganizationMapping have been modified. Set 'SENTRY_MONOLITH_REGION' in sentry.conf.py to update monolith mappings."
  63. )
  64. else:
  65. qs = OrganizationMapping.objects.filter(region_name=legacy_region_name)
  66. record_count = len(qs)
  67. qs.update(region_name=settings.SENTRY_MONOLITH_REGION)
  68. click.echo(
  69. f"> {record_count} OrganizationMapping record(s) have been updated from '{legacy_region_name}' to '{settings.SENTRY_MONOLITH_REGION}'"
  70. )
  71. @click.command()
  72. @click.option(
  73. "--legacy-region-name",
  74. default="--monolith--",
  75. help="Previous value of settings.SENTRY_MONOLITH_REGION to overwrite in organization mappings",
  76. )
  77. @click.option("--verbose", default=False, is_flag=True, help="Enable verbose logging")
  78. @click.option(
  79. "--reset",
  80. default=False,
  81. is_flag=True,
  82. help="Reset the target databases to be empty before loading extracted data and schema.",
  83. )
  84. @click.option("--database", default="sentry", help="Which database to derive splits from")
  85. def main(database: str, reset: bool, verbose: bool, legacy_region_name: str):
  86. """
  87. This is a development tool that can convert a monolith database into
  88. control + region databases by using silo annotations.
  89. This operation will not modify the original source database.
  90. """
  91. # We have a few tables that either need to be in both silos,
  92. # or only in control. These tables don't have silo annotations
  93. # as they are inherited from django and their silo assignments
  94. # need to be manually defined.
  95. region_tables = ["django_migrations", "django_content_type"]
  96. control_tables = [
  97. "django_migrations",
  98. "django_admin_log",
  99. "django_content_type",
  100. "django_site",
  101. "django_session",
  102. "auth_user",
  103. "auth_group",
  104. "auth_permission",
  105. "auth_group_permissions",
  106. "auth_user_groups",
  107. "auth_user_user_permissions",
  108. ]
  109. for model in apps.get_models():
  110. silo_limit = getattr(model._meta, "silo_limit", None)
  111. if not silo_limit:
  112. click.echo(f"> Could not find silo assignment for {model._meta.db_table}")
  113. continue
  114. if SiloMode.CONTROL in silo_limit.modes:
  115. control_tables.append(model._meta.db_table)
  116. if SiloMode.REGION in silo_limit.modes:
  117. region_tables.append(model._meta.db_table)
  118. revise_organization_mappings(legacy_region_name=legacy_region_name)
  119. split_database(control_tables, database, "control", reset=reset, verbose=verbose)
  120. split_database(region_tables, database, "region", reset=reset, verbose=verbose)
  121. if __name__ == "__main__":
  122. main()