netdata-pkgcloud-cleanup.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. #!/bin/env python3
  2. import requests
  3. from requests.auth import HTTPBasicAuth
  4. from datetime import date, datetime, timedelta
  5. import os
  6. import sys
  7. import argparse
  8. from pprint import pprint
  9. from datetime import datetime
  10. from dateutil import parser
  11. class PackageCloud:
  12. NUM_PACKAGE_MINOR_TO_KEEP = 5
  13. NUM_RETENTION_DAYS = 30
  14. # number of pages to process. Use '0' to process all
  15. MAX_PAGES = 0
  16. def __init__(self, repo_type, dry_run=True, auth_token=None):
  17. self.headers = {
  18. "Accept" : "application/json",
  19. "Content-Type" : "application/json",
  20. }
  21. self.dry_run = dry_run
  22. self.repo_type = repo_type
  23. if repo_type == "stable":
  24. repo = "netdata/netdata"
  25. elif repo_type == "devel":
  26. repo = "netdata/netdata-devel"
  27. elif repo_type == "edge":
  28. repo = "netdata/netdata-edge"
  29. else:
  30. print(f"ERROR: unknown repo type '{repo_type}'!\nAccepted values are: stable,devel,edge")
  31. sys.exit(1)
  32. self.base_url = f"https://packagecloud.io/api/v1/repos/{repo}"
  33. self.auth = HTTPBasicAuth(username=auth_token, password='') if auth_token else None
  34. def get_all_packages(self):
  35. page = 1
  36. all_pkg_list = []
  37. while True:
  38. url = f"{self.base_url}/packages.json?page={page}"
  39. if page > self.MAX_PAGES and self.MAX_PAGES != 0:
  40. break
  41. else:
  42. pkg_list = requests.get(url, auth=self.auth, headers=self.headers).json()
  43. if len(pkg_list) == 0:
  44. break
  45. else:
  46. print(f"Processing page: {page}")
  47. for element in pkg_list:
  48. self.is_pkg_older_than_days(element, 30)
  49. if element['name'] != 'netdata-repo' and element['name'] != 'netdata-repo-edge':
  50. all_pkg_list.append(element)
  51. page += 1
  52. return all_pkg_list
  53. def delete_package(self, destroy_url):
  54. if self.dry_run:
  55. print(f" - DRY_RUN mode. Not deleting package '{destroy_url}'.")
  56. else:
  57. print(f" - Deleting package: {destroy_url}")
  58. url = f"https://packagecloud.io{destroy_url}"
  59. response = requests.delete(url, auth=self.auth, headers=self.headers).json()
  60. response = None
  61. if not response:
  62. print(f" Package deleted successfully.")
  63. else:
  64. print(f" Failed deleting package!")
  65. def get_destroy_url(self, pkg_url):
  66. url = f"https://packagecloud.io{pkg_url}"
  67. response = requests.get(url, auth=self.auth, headers=self.headers)
  68. response.raise_for_status()
  69. return response.json()['destroy_url']
  70. def get_packages_for_distro(self, distro, all_pkg_list):
  71. distro_pkg_list = [ pkg for pkg in all_pkg_list if pkg['distro_version'] == distro ]
  72. return distro_pkg_list
  73. def get_packages_for_arch(self, arch, all_pkg_list):
  74. arch_pkg_list = [ pkg for pkg in all_pkg_list if pkg['package_url'].split('/')[11] == arch ]
  75. return arch_pkg_list
  76. def get_arches(self, pkg_list):
  77. arches = list(set([pkg['package_url'].split('/')[11] for pkg in pkg_list ]))
  78. return arches
  79. def get_pkg_list(self, pkg_name, pkg_list):
  80. filtered_list = [ pkg for pkg in pkg_list if pkg['name'] == pkg_name ]
  81. return filtered_list
  82. def get_minor_versions(self, all_versions):
  83. minor_versions = ['.'.join(version.split('.')[:-1]) for version in all_versions ]
  84. minor_versions = list(set(minor_versions))
  85. minor_versions.sort()
  86. return minor_versions
  87. def is_pkg_older_than_days(self, pkg, num_days):
  88. pkg_create_date = datetime.strptime(pkg['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ')
  89. time_difference = datetime.now() - pkg_create_date
  90. return time_difference.days > num_days
  91. def cleanup_repo(self):
  92. if self.repo_type == 'stable':
  93. self.cleanup_stable_repo()
  94. else:
  95. self.cleanup_edge_repo()
  96. def cleanup_edge_repo(self):
  97. all_pkg_list = self.get_all_packages()
  98. pkgs_to_delete = []
  99. pkgs_to_keep = []
  100. for package in all_pkg_list:
  101. if self.is_pkg_older_than_days(package, self.NUM_RETENTION_DAYS):
  102. pkgs_to_delete.append(package)
  103. else:
  104. pkgs_to_keep.append(package)
  105. print(f"Keeping the following packages (newer than {self.NUM_RETENTION_DAYS} days):")
  106. for pkg in pkgs_to_keep:
  107. print(f" > pkg: {pkg['package_html_url']} / created_at: {pkg['created_at']}")
  108. print(f"Deleting the following packages (older than {self.NUM_RETENTION_DAYS} days):")
  109. for pkg in pkgs_to_delete:
  110. print(f" > pkg: {pkg['package_html_url']} / created_at: {pkg['created_at']}")
  111. self.delete_package(pkg['destroy_url'])
  112. def cleanup_stable_repo(self):
  113. all_pkg_list = self.get_all_packages()
  114. all_distros = list(set([ pkg['distro_version'] for pkg in all_pkg_list ]))
  115. all_distros = sorted(all_distros)
  116. print(f"<> Distributions list: {all_distros}")
  117. for distro in all_distros:
  118. print(f">> Processing distro: {distro}")
  119. pkg_list_distro = self.get_packages_for_distro(distro, all_pkg_list)
  120. arches = self.get_arches(pkg_list_distro)
  121. print(f" <> Arch list: {arches}")
  122. for arch in arches:
  123. print(f" >> Processing arch: {distro} -> {arch}")
  124. pkg_list_arch = self.get_packages_for_arch(arch, pkg_list_distro)
  125. pkg_names = [pkg['name'] for pkg in pkg_list_arch]
  126. pkg_names = list(set(pkg_names))
  127. print(f" <> Package names: {pkg_names}")
  128. for pkg_name in pkg_names:
  129. print(f" >> Processing package: {distro} -> {arch} -> {pkg_name}")
  130. pkg_list = self.get_pkg_list(pkg_name, pkg_list_arch)
  131. pkg_versions = [pkg['version'] for pkg in pkg_list]
  132. pkg_minor_versions = self.get_minor_versions(pkg_versions)
  133. pkg_minor_to_keep = pkg_minor_versions[-self.NUM_PACKAGE_MINOR_TO_KEEP:]
  134. print(f" <> Minor Package Versions to Keep: {pkg_minor_to_keep}")
  135. pkg_minor_to_delete = list(set(pkg_minor_versions) - set(pkg_minor_to_keep))
  136. print(f" <> Minor Package Versions to Delete: {pkg_minor_to_delete}")
  137. urls_to_keep = [pkg['package_url'] for pkg in pkg_list if '.'.join(pkg['version'].split('.')[:-1]) in pkg_minor_to_keep]
  138. urls_to_delete = [pkg['package_url'] for pkg in pkg_list if '.'.join(pkg['version'].split('.')[:-1]) in pkg_minor_to_delete]
  139. for pkg_url in urls_to_delete:
  140. destroy_url = self.get_destroy_url(pkg_url)
  141. self.delete_package(destroy_url)
  142. def configure():
  143. parser = argparse.ArgumentParser()
  144. parser.add_argument('--repo-type', '-r', required=True,
  145. help='Repository type against to perform cleanup')
  146. parser.add_argument('--dry-run', '-d', action='store_true',
  147. help='Dry-run Mode')
  148. args = parser.parse_args()
  149. try:
  150. token = os.environ['PKGCLOUD_TOKEN']
  151. except Exception as e:
  152. print(f"FATAL: 'PKGCLOUD_TOKEN' environment variable is not set!", file=sys.stderr)
  153. sys.exit(1)
  154. repo_type = args.repo_type
  155. dry_run = args.dry_run
  156. conf = {
  157. 'repo_type': args.repo_type,
  158. 'dry_run': args.dry_run,
  159. 'token': token
  160. }
  161. return conf
  162. def main():
  163. config = configure()
  164. pkg_cloud = PackageCloud(config['repo_type'], config['dry_run'], config['token'])
  165. pkg_cloud.cleanup_repo()
  166. if __name__ == "__main__":
  167. main()