Browse Source

Implement PackageCloud cleanup (#13236)

* add script for netdata packagecloud cleanup

* add script for netdata packagecloud cleanup

* remove old package-cloud cleanup code

* add workflow for packagecloud cleanup

* Debug msg

* allow individual matrix jobs to complete independently to each other

* perform cleanup without dry-run

* remove debug msg

* remove cleanup line from the slack notification
maneamarius 2 years ago
parent
commit
2b5999c393

+ 190 - 0
.github/scripts/netdata-pkgcloud-cleanup.py

@@ -0,0 +1,190 @@
+#!/bin/env python3
+
+import requests
+from requests.auth import HTTPBasicAuth
+from datetime import date, datetime, timedelta
+import os
+import sys
+import argparse
+from pprint import pprint
+from datetime import datetime
+from dateutil import parser
+
+
+class PackageCloud:
+    NUM_PACKAGE_MINOR_TO_KEEP = 5
+    NUM_RETENTION_DAYS = 30
+    # number of pages to process. Use '0' to process all
+    MAX_PAGES = 0
+
+    def __init__(self, repo_type, dry_run=True, auth_token=None):
+        self.headers = {
+            "Accept" : "application/json",
+            "Content-Type" : "application/json",
+        }
+        self.dry_run = dry_run
+        self.repo_type = repo_type
+        if repo_type == "stable":
+            repo = "netdata/netdata"
+        elif repo_type == "devel":
+            repo = "netdata/netdata-devel"
+        elif repo_type == "edge":
+            repo = "netdata/netdata-edge"
+        else:
+            print(f"ERROR: unknown repo type '{repo_type}'!\nAccepted values are: stable,devel,edge")
+            sys.exit(1)
+        self.base_url = f"https://packagecloud.io/api/v1/repos/{repo}"
+        self.auth = HTTPBasicAuth(username=auth_token, password='') if auth_token else None
+
+    def get_all_packages(self):
+        page = 1
+        all_pkg_list = []
+        while True:
+            url = f"{self.base_url}/packages.json?page={page}"
+            if page > self.MAX_PAGES and self.MAX_PAGES != 0:
+                break
+            else:
+                pkg_list = requests.get(url, auth=self.auth, headers=self.headers).json()
+                if len(pkg_list) == 0:
+                    break
+                else:
+                    print(f"Processing page: {page}")
+                    for element in pkg_list:
+                        self.is_pkg_older_than_days(element, 30)
+                        if element['name'] != 'netdata-repo' and element['name'] != 'netdata-repo-edge':
+                            all_pkg_list.append(element)
+            page += 1
+        return all_pkg_list
+
+    def delete_package(self, destroy_url):
+        if self.dry_run:
+            print(f"         - DRY_RUN mode. Not deleting package '{destroy_url}'.")
+        else:
+            print(f"         - Deleting package: {destroy_url}")
+            url = f"https://packagecloud.io{destroy_url}"
+            response = requests.delete(url, auth=self.auth, headers=self.headers).json()
+            response = None
+            if not response:
+                print(f"           Package deleted successfully.")
+            else:
+                print(f"           Failed deleting package!")
+
+    def get_destroy_url(self, pkg_url):
+        url = f"https://packagecloud.io{pkg_url}"
+        response = requests.get(url, auth=self.auth, headers=self.headers)
+        response.raise_for_status()
+        return response.json()['destroy_url']
+
+    def get_packages_for_distro(self, distro, all_pkg_list):
+        distro_pkg_list = [ pkg for pkg in all_pkg_list if pkg['distro_version'] == distro ]
+        return distro_pkg_list
+
+    def get_packages_for_arch(self, arch, all_pkg_list):
+        arch_pkg_list = [ pkg for pkg in all_pkg_list if pkg['package_url'].split('/')[11] == arch ]
+        return arch_pkg_list
+
+    def get_arches(self, pkg_list):
+        arches = list(set([pkg['package_url'].split('/')[11] for pkg in pkg_list ]))
+        return arches
+
+    def get_pkg_list(self, pkg_name, pkg_list):
+        filtered_list = [ pkg for pkg in pkg_list if pkg['name'] == pkg_name ]
+        return filtered_list
+
+    def get_minor_versions(self, all_versions):
+        minor_versions = ['.'.join(version.split('.')[:-1]) for version in all_versions ]
+        minor_versions = list(set(minor_versions))
+        minor_versions.sort()
+        return minor_versions
+
+    def is_pkg_older_than_days(self, pkg, num_days):
+        pkg_create_date = datetime.strptime(pkg['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ')
+        time_difference = datetime.now() - pkg_create_date
+        return time_difference.days > num_days
+
+    def cleanup_repo(self):
+        if self.repo_type == 'stable':
+            self.cleanup_stable_repo()
+        else:
+            self.cleanup_edge_repo()
+
+    def cleanup_edge_repo(self):
+        all_pkg_list = self.get_all_packages()
+        pkgs_to_delete = []
+        pkgs_to_keep = []
+        for package in all_pkg_list:
+            if self.is_pkg_older_than_days(package, self.NUM_RETENTION_DAYS):
+                pkgs_to_delete.append(package)
+            else:
+                pkgs_to_keep.append(package)
+        print(f"Keeping the following packages (newer than {self.NUM_RETENTION_DAYS} days):")
+        for pkg in pkgs_to_keep:
+            print(f"    > pkg: {pkg['package_html_url']} / created_at: {pkg['created_at']}")
+        print(f"Deleting the following packages (older than {self.NUM_RETENTION_DAYS} days):")
+        for pkg in pkgs_to_delete:
+            print(f"    > pkg: {pkg['package_html_url']} / created_at: {pkg['created_at']}")
+            self.delete_package(pkg['destroy_url'])
+
+    def cleanup_stable_repo(self):
+        all_pkg_list = self.get_all_packages()
+        all_distros = list(set([ pkg['distro_version'] for pkg in all_pkg_list ]))
+        all_distros = sorted(all_distros)
+        print(f"<> Distributions list: {all_distros}")
+
+        for distro in all_distros:
+            print(f">> Processing distro: {distro}")
+            pkg_list_distro = self.get_packages_for_distro(distro, all_pkg_list)
+            arches = self.get_arches(pkg_list_distro)
+            print(f"   <> Arch list: {arches}")
+            for arch in arches:
+                print(f"   >> Processing arch: {distro} -> {arch}")
+                pkg_list_arch = self.get_packages_for_arch(arch, pkg_list_distro)
+                pkg_names = [pkg['name'] for pkg in pkg_list_arch]
+                pkg_names = list(set(pkg_names))
+                print(f"     <> Package names: {pkg_names}")
+                for pkg_name in pkg_names:
+                    print(f"     >> Processing package: {distro} -> {arch} -> {pkg_name}")
+                    pkg_list = self.get_pkg_list(pkg_name, pkg_list_arch)
+                    pkg_versions = [pkg['version'] for pkg in pkg_list]
+                    pkg_minor_versions = self.get_minor_versions(pkg_versions)
+                    pkg_minor_to_keep = pkg_minor_versions[-self.NUM_PACKAGE_MINOR_TO_KEEP:]
+                    print(f"       <> Minor Package Versions to Keep: {pkg_minor_to_keep}")
+                    pkg_minor_to_delete = list(set(pkg_minor_versions) - set(pkg_minor_to_keep))
+                    print(f"       <> Minor Package Versions to Delete: {pkg_minor_to_delete}")
+                    urls_to_keep = [pkg['package_url'] for pkg in pkg_list if '.'.join(pkg['version'].split('.')[:-1]) in pkg_minor_to_keep]
+                    urls_to_delete = [pkg['package_url'] for pkg in pkg_list if '.'.join(pkg['version'].split('.')[:-1]) in pkg_minor_to_delete]
+                    for pkg_url in urls_to_delete:
+                        destroy_url = self.get_destroy_url(pkg_url)
+                        self.delete_package(destroy_url)
+
+
+def configure():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--repo-type', '-r', required=True,
+                        help='Repository type against to perform cleanup')
+    parser.add_argument('--dry-run', '-d', action='store_true',
+                        help='Dry-run Mode')
+    args = parser.parse_args()
+    try:
+        token = os.environ['PKGCLOUD_TOKEN']
+    except Exception as e:
+        print(f"FATAL: 'PKGCLOUD_TOKEN' environment variable is not set!", file=sys.stderr)
+        sys.exit(1)
+    repo_type = args.repo_type
+    dry_run = args.dry_run
+    conf = {
+        'repo_type': args.repo_type,
+        'dry_run': args.dry_run,
+        'token': token
+    }
+    return conf
+
+
+def main():
+    config = configure()
+    pkg_cloud = PackageCloud(config['repo_type'], config['dry_run'], config['token'])
+    pkg_cloud.cleanup_repo()
+
+
+if __name__ == "__main__":
+    main()

+ 0 - 88
.github/scripts/old_package_purging.sh

@@ -1,88 +0,0 @@
-#!/usr/bin/env bash
-#
-# Script to handle package cloud retention policy
-# Our open source subscription is limited,
-# so we use this script to control the number of packages maintained historically
-#
-# Dependencies:
-# - PACKAGE_CLOUD_RETENTION_DAYS
-#   This is to indicate for how many days back we want to maintain the various RPM and DEB packages on package cloud
-#
-# Copyright : SPDX-License-Identifier: GPL-3.0-or-later
-#
-# Author    : Pavlos Emm. Katsoulakis <paul@netdata.cloud>
-#
-set -e
-
-delete_files_for_version() {
-	local v="$1"
-
-	# Delete the selected filenames in version
-	FILES_IN_VERSION=$(jq --sort-keys --arg v "${v}" '.[] | select ( .version | contains($v))' "${PKG_LIST_FILE}" | grep filename | cut -d':' -f 2)
-
-	# Iterate through the files and delete them
-	for pkg in ${FILES_IN_VERSION/\\n/}; do
-		pkg=${pkg/,/}
-		pkg=${pkg/\"/}
-		pkg=${pkg/\"/}
-		echo "Attempting yank on ${pkg}.."
-		.github/scripts/package_cloud_wrapper.sh yank "${REPO}" "${pkg}" || echo "Nothing to yank or error on ${pkg}"
-	done
-}
-
-# If we are not in netdata git repo, at the top level directory, fail
-TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)")
-CWD=$(git rev-parse --show-cdup)
-if [ -n "$CWD" ] || [ ! "${TOP_LEVEL}" == "netdata" ]; then
-    echo "Run as .github/scripts/$(basename "$0") from top level directory of netdata git repository"
-    echo "Old packages yanking cancelled"
-    exit 1
-fi
-
-if [ -z "${REPO}" ]; then
-	echo "No REPO variable found"
-	exit 1
-fi
-
-if [ -z ${PKG_CLOUD_TOKEN} ]; then
-	echo "No PKG_CLOUD_TOKEN variable found"
-	exit 1
-fi
-
-if [ -z ${PACKAGE_CLOUD_RETENTION_DAYS} ]; then
-	echo "No PACKAGE_CLOUD_RETENTION_DAYS variable found"
-	exit 1
-fi
-
-TMP_DIR="$(mktemp -d /tmp/netdata-old-package-yanking-XXXXXX)"
-PKG_LIST_FILE="${TMP_DIR}/complete_package_list.json"
-DATE_EPOCH="1970-01-01"
-DATE_UNTIL_TO_DELETE=$(date --date="${PACKAGE_CLOUD_RETENTION_DAYS} day ago" +%Y-%m-%d)
-
-
-echo "Created temp directory: ${TMP_DIR}"
-echo "We will be purging contents up until ${DATE_UNTIL_TO_DELETE}"
-
-echo "Calling package could to retrieve all available packages on ${REPO}"
-curl -sS "https://${PKG_CLOUD_TOKEN}:@packagecloud.io/api/v1/repos/${REPO}/packages.json" > "${PKG_LIST_FILE}"
-
-# Get versions within the desired duration
-#
-VERSIONS_TO_PURGE=$(jq --arg s "${DATE_EPOCH}" --arg e "${DATE_UNTIL_TO_DELETE}" '
-[($s, $e) | strptime("%Y-%m-%d")[0:3]] as $r
-  | map(select(
-        (.created_at[:19] | strptime("%Y-%m-%dT%H:%M:%S")[0:3]) as $d
-          | $d >= $r[0] and $d <= $r[1]
-))' "${PKG_LIST_FILE}" | grep '"version":' | sort -u | sed -e 's/ //g' | cut -d':' -f2)
-
-echo "We will be deleting the following versions: ${VERSIONS_TO_PURGE}"
-for v in ${VERSIONS_TO_PURGE/\n//}; do
-	v=${v/\"/}
-	v=${v/\"/}
-	v=${v/,/}
-	echo "Remove all files for version $v"
-	delete_files_for_version "${v}"
-done
-
-# Done, clean up
-[ -d "${TMP_DIR}" ] && rm -rf "${TMP_DIR}"

+ 36 - 0
.github/workflows/packagecloud.yml

@@ -0,0 +1,36 @@
+---
+# Runs PackageCloud cleanup every day at 9pm
+name: PackageCloud Cleanup
+on:
+  schedule:
+    - cron: '0 21 * * *'
+  workflow_dispatch: null
+
+jobs:
+  cleanup:
+    name: PackageCloud Cleanup
+    runs-on: ubuntu-latest
+    if: always()
+    strategy:
+      fail-fast: false
+      matrix:
+        repos:
+          - stable
+          - edge
+          - devel
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        id: checkout
+        with:
+          submodules: recursive
+      - name: Prepare environment
+        id: prepare
+        run: |
+          pip3 install requests python-dateutil
+      - name: Run PackageCloud Cleanup
+        id: cleanup
+        env:
+          PKGCLOUD_TOKEN: ${{ secrets.PACKAGE_CLOUD_API_KEY }}
+        run: |
+          python3 .github/scripts/netdata-pkgcloud-cleanup.py -r ${{ matrix.repos }}

+ 0 - 10
.github/workflows/packaging.yml

@@ -227,15 +227,6 @@ jobs:
             "$(basename "${pkgfile}")" || true
             .github/scripts/package_cloud_wrapper.sh push ${{ needs.version-check.outputs.repo }}/${{ matrix.pkgclouddistro }} "${pkgfile}"
           done
-      - name: Clean
-        id: cleanup
-        if: github.event_name == 'workflow_dispatch'
-        shell: bash
-        env:
-          REPO: ${{ needs.version-check.outputs.repo }}
-          PKG_CLOUD_TOKEN: ${{ secrets.PACKAGE_CLOUD_API_KEY }}
-          PACKAGE_CLOUD_RETENTION_DAYS: ${{ needs.version-check.outputs.retention }}
-        run: .github/scripts/old_package_purging.sh
       - name: Failure Notification
         uses: rtCamp/action-slack-notify@v2
         env:
@@ -252,7 +243,6 @@ jobs:
               Build: ${{ steps.build.outcome }}
               Test: ${{ steps.test.outcome }}
               Publish: ${{ steps.upload.outcome }}
-              Cleanup: ${{ steps.cleanup.outcome }}
           SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
         if: >-
           ${{