backfill_cdc.sh 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. #!/bin/bash
  2. # Backfills all the CDC tables
  3. set -e
  4. declare -a STORAGES=("groupedmessages" "groupassignees")
  5. log_message() {
  6. GREEN='\033[0;32m'
  7. NC='\033[0m'
  8. echo -e "${GREEN}${1}${NC}"
  9. }
  10. mkdir -p /tmp/cdc-snapshots/
  11. log_message "********* Taking the snapshot from Postgres *********"
  12. cd "$(dirname "$0")"
  13. docker run \
  14. -v "$(pwd)"/../config/cdc/configuration.yaml:/etc/cdc/configuration.yaml \
  15. -v "$(pwd)"/../config/cdc/cdc-snapshot-config.yaml:/etc/cdc/cdc-snapshot-config.yaml \
  16. -v /tmp/cdc-snapshots:/tmp/cdc-snapshots \
  17. --rm \
  18. --network sentry \
  19. ghcr.io/getsentry/cdc:latest \
  20. cdc -c /etc/cdc/configuration.yaml \
  21. snapshot --snapshot-config /etc/cdc/cdc-snapshot-config.yaml \
  22. 2>&1 | tee /tmp/cdc-snapshots/snapshot.log
  23. SNAPSHOT_ID=$(awk '{ if($4=="Starting" && $5=="snapshot" && $6=="ID") print $7}' /tmp/cdc-snapshots/snapshot.log )
  24. SNAPSHOT_PATH="/tmp/cdc-snapshots/cdc_snapshot_snuba_$SNAPSHOT_ID"
  25. rm /tmp/cdc-snapshots/snapshot.log
  26. log_message "********* Loading the snapshot into Snuba *********"
  27. for i in "${!STORAGES[@]}";
  28. do
  29. log_message "********* Loading ${STORAGES[$i]}"
  30. docker run \
  31. -v "$SNAPSHOT_PATH"/:/tmp/cdc-snapshot \
  32. --rm \
  33. --network sentry \
  34. -e SNUBA_SETTINGS=docker \
  35. -e CLICKHOUSE_HOST=sentry_clickhouse \
  36. getsentry/snuba:nightly \
  37. snuba bulk-load --storage="${STORAGES[$i]}" \
  38. --source=/tmp/cdc-snapshot \
  39. --ignore-existing-data \
  40. --pre-processed \
  41. --show-progress
  42. done
  43. log_message "********* Done *********"
  44. echo "You can now remove the snapshot from $SNAPSHOT_PATH"