123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- """ Partial port of sentry/tasks/assemble.py """
- import hashlib
- import json
- import shutil
- import tempfile
- from enum import Enum
- from os import path
- from django.core.cache import cache
- from apps.organizations_ext.models import Organization
- from apps.releases.models import Release, ReleaseFile
- from sentry.utils.zip import safe_extract_zip
- from .exceptions import AssembleArtifactsError, AssembleChecksumMismatch
- from .models import File, FileBlob
- MAX_FILE_SIZE = 2**31 # 2GB is the maximum offset supported by fileblob
- class ChunkFileState(Enum):
- OK = "ok" # File in database
- NOT_FOUND = "not_found" # File not found in database
- CREATED = "created" # File was created in the request and send to the worker for assembling
- ASSEMBLING = "assembling" # File still being processed by worker
- ERROR = "error" # Error happened during assembling
- class AssembleTask(Enum):
- DIF = "project.dsym" # Debug file upload
- ARTIFACTS = "organization.artifacts" # Release file upload
- def _get_cache_key(task, scope, checksum):
- """Computes the cache key for assemble status.
- ``task`` must be one of the ``AssembleTask`` values. The scope can be the
- identifier of any model, such as the organization or project that this task
- is performed under.
- ``checksum`` should be the SHA1 hash of the main file that is being
- assembled.
- """
- return (
- "assemble-status:%s"
- % hashlib.sha1(
- ("%s|%s|%s" % (scope, checksum.encode("ascii"), task)).encode()
- ).hexdigest()
- )
- def set_assemble_status(
- task: AssembleTask, scope, checksum, state: ChunkFileState, detail=None
- ):
- """
- Updates the status of an assembling task. It is cached for 10 minutes.
- """
- cache_key = _get_cache_key(task, scope, checksum)
- cache.set(cache_key, (state, detail), 600)
- def assemble_artifacts(organization, version, checksum, chunks):
- set_assemble_status(
- AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.ASSEMBLING
- )
- # Assemble the chunks into a temporary file
- rv = assemble_file(
- AssembleTask.ARTIFACTS,
- organization,
- "release-artifacts.zip",
- checksum,
- chunks,
- file_type="release.bundle",
- )
- if rv is None:
- return
- bundle, temp_file = rv
- scratchpad = tempfile.mkdtemp()
- try:
- safe_extract_zip(temp_file, scratchpad, strip_toplevel=False)
- except BaseException as ex:
- raise AssembleArtifactsError("failed to extract bundle") from ex
- try:
- manifest_path = path.join(scratchpad, "manifest.json")
- with open(manifest_path, "rb") as manifest:
- manifest = json.loads(manifest.read())
- except BaseException as ex:
- raise AssembleArtifactsError("failed to open release manifest") from ex
- if organization.slug != manifest.get("org"):
- raise AssembleArtifactsError("organization does not match uploaded bundle")
- release_name = manifest.get("release")
- if release_name != version:
- raise AssembleArtifactsError("release does not match uploaded bundle")
- try:
- release = organization.release_set.get(version=release_name)
- except Release.DoesNotExist as ex:
- raise AssembleArtifactsError("release does not exist") from ex
- # Sentry would add dist to release here
- artifacts = manifest.get("files", {})
- for rel_path, artifact in artifacts.items():
- artifact_url = artifact.get("url", rel_path)
- artifact_basename = artifact_url.rsplit("/", 1)[-1]
- file = File.objects.create(
- name=artifact_basename,
- type="release.file",
- headers=artifact.get("headers", {}),
- )
- full_path = path.join(scratchpad, rel_path)
- with open(full_path, "rb") as fp:
- file.putfile(fp)
- # kwargs = {
- # "organization_id": organization.id,
- # "release": release,
- # "name": artifact_url,
- # # "dist": dist,
- # }
- release_file, created = ReleaseFile.objects.get_or_create(
- release=release, name=artifact_url, defaults={"file": file}
- )
- if not created:
- old_file = release_file.file
- release_file.file = file
- release_file.save(update_fields=["file"])
- old_file.delete()
- set_assemble_status(
- AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.OK
- )
- shutil.rmtree(scratchpad)
- bundle.delete()
- def assemble_file(
- task: AssembleTask,
- organization: Organization,
- name: str,
- checksum,
- chunks,
- file_type,
- ):
- """
- Verifies and assembles a file model from chunks.
- This downloads all chunks from blob store to verify their integrity and
- associates them with a created file model. Additionally, it assembles the
- full file in a temporary location and verifies the complete content hash.
- Returns a tuple ``(File, TempFile)`` on success, or ``None`` on error.
- """
- # Load all FileBlobs from db since we can be sure here we already own all
- # chunks need to build the file
- file_blobs = FileBlob.objects.filter(checksum__in=chunks).values_list(
- "id", "checksum", "size"
- )
- # Reject all files that exceed the maximum allowed size for this
- # organization. This value cannot be
- file_size = sum(x[2] for x in file_blobs)
- if file_size > MAX_FILE_SIZE:
- set_assemble_status(
- task,
- organization.id,
- checksum,
- ChunkFileState.ERROR,
- detail="File exceeds maximum size",
- )
- return
- # Sanity check. In case not all blobs exist at this point we have a
- # race condition.
- if set(x[1] for x in file_blobs) != set(chunks):
- set_assemble_status(
- task,
- organization.id,
- checksum,
- ChunkFileState.ERROR,
- detail="Not all chunks available for assembling",
- )
- return
- # Ensure blobs are in the order and duplication in which they were
- # transmitted. Otherwise, we would assemble the file in the wrong order.
- ids_by_checksum = {chks: id for id, chks, _ in file_blobs}
- file_blob_ids = [ids_by_checksum[c] for c in chunks]
- file = File.objects.create(name=name, checksum=checksum, type=file_type)
- try:
- temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum)
- except AssembleChecksumMismatch:
- file.delete()
- set_assemble_status(
- task,
- organization.id,
- checksum,
- ChunkFileState.ERROR,
- detail="Reported checksum mismatch",
- )
- else:
- file.save()
- return file, temp_file
|