123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- """Partial port of sentry/tasks/assemble.py"""
- import hashlib
- import json
- import shutil
- import tempfile
- from enum import Enum
- from os import path
- from django.core.cache import cache
- from apps.organizations_ext.models import Organization
- from apps.releases.models import Release
- from apps.sourcecode.models import DebugSymbolBundle
- from sentry.utils.zip import safe_extract_zip
- from .exceptions import AssembleArtifactsError, AssembleChecksumMismatch
- from .models import File, FileBlob
- MAX_FILE_SIZE = 2**31 # 2GB is the maximum offset supported by fileblob
- class ChunkFileState(Enum):
- OK = "ok" # File in database
- NOT_FOUND = "not_found" # File not found in database
- CREATED = "created" # File was created in the request and send to the worker for assembling
- ASSEMBLING = "assembling" # File still being processed by worker
- ERROR = "error" # Error happened during assembling
- class AssembleTask(Enum):
- DIF = "project.dsym" # Debug file upload
- ARTIFACTS = "organization.artifacts" # Release file upload
- def _get_cache_key(task, scope, checksum):
- """Computes the cache key for assemble status.
- ``task`` must be one of the ``AssembleTask`` values. The scope can be the
- identifier of any model, such as the organization or project that this task
- is performed under.
- ``checksum`` should be the SHA1 hash of the main file that is being
- assembled.
- """
- return (
- "assemble-status:%s"
- % hashlib.sha1(
- ("%s|%s|%s" % (scope, checksum.encode("ascii"), task)).encode()
- ).hexdigest()
- )
- def set_assemble_status(
- task: AssembleTask, scope, checksum, state: ChunkFileState, detail=None
- ):
- """
- Updates the status of an assembling task. It is cached for 10 minutes.
- """
- cache_key = _get_cache_key(task, scope, checksum)
- cache.set(cache_key, (state, detail), 600)
- def assemble_artifacts(
- organization: Organization, version: str | None, checksum: str, chunks: list[str]
- ):
- set_assemble_status(
- AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.ASSEMBLING
- )
- # Assemble the chunks into a temporary file
- rv = assemble_file(
- AssembleTask.ARTIFACTS,
- organization,
- "release-artifacts.zip",
- checksum,
- chunks,
- file_type="release.bundle",
- )
- if rv is None:
- return
- bundle, temp_file = rv
- scratchpad = tempfile.mkdtemp()
- try:
- safe_extract_zip(temp_file, scratchpad, strip_toplevel=False)
- except BaseException as ex:
- raise AssembleArtifactsError("failed to extract bundle") from ex
- try:
- manifest_path = path.join(scratchpad, "manifest.json")
- with open(manifest_path, "rb") as manifest:
- manifest = json.loads(manifest.read())
- except BaseException as ex:
- raise AssembleArtifactsError("failed to open release manifest") from ex
- if organization.slug != manifest.get("org"):
- raise AssembleArtifactsError("organization does not match uploaded bundle")
- release_name = manifest.get("release")
- if release_name != version:
- raise AssembleArtifactsError("release does not match uploaded bundle")
- release: Release | None = None
- if release_name:
- release, _ = Release.objects.get_or_create(
- organization=organization, version=release_name
- )
- # Sentry OSS would add dist to release here
- artifacts = manifest.get("files", {})
- files = []
- for rel_path, artifact in artifacts.items():
- artifact_url = artifact.get("url", rel_path)
- artifact_basename = artifact_url.rsplit("/", 1)[-1]
- headers = artifact.get("headers", {})
- file = File.objects.create(
- name=artifact_basename,
- type=artifact["type"],
- headers=headers,
- )
- files.append(file)
- full_path = path.join(scratchpad, rel_path)
- with open(full_path, "rb") as fp:
- file.putfile(fp)
- bundles: list[DebugSymbolBundle] = []
- for file in files:
- if file.type == "minified_source":
- try:
- sourcemap_file = next(
- value
- for value in files
- if value.type == "source_map"
- and (
- file.headers.get("sourcemap", file.headers.get("Sourcemap"))
- == value.name
- or (
- value.headers.get("debug-id")
- and value.headers.get("debug-id")
- == file.headers.get("debug-id")
- )
- )
- )
- except StopIteration:
- sourcemap_file = None
- if sourcemap_file:
- bundles.append(
- DebugSymbolBundle(
- organization=organization,
- debug_id=file.headers.get("debug-id"),
- release=release,
- sourcemap_file=sourcemap_file,
- file=file,
- )
- )
- DebugSymbolBundle.objects.bulk_create(
- bundles,
- ignore_conflicts=True,
- # unique_fields=["organization", "debug_id", "release"],
- # update_fields=["file", "sourcemap_file"],
- )
- # May need to readd this logic but in bulk
- # if not created:
- # old_file = release_file.file
- # release_file.file = file
- # release_file.save(update_fields=["file"])
- # old_file.delete()
- set_assemble_status(
- AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.OK
- )
- shutil.rmtree(scratchpad)
- bundle.delete()
- def assemble_file(
- task: AssembleTask,
- organization: Organization,
- name: str,
- checksum,
- chunks,
- file_type,
- ):
- """
- Verifies and assembles a file model from chunks.
- This downloads all chunks from blob store to verify their integrity and
- associates them with a created file model. Additionally, it assembles the
- full file in a temporary location and verifies the complete content hash.
- Returns a tuple ``(File, TempFile)`` on success, or ``None`` on error.
- """
- # Load all FileBlobs from db since we can be sure here we already own all
- # chunks need to build the file
- file_blobs = FileBlob.objects.filter(checksum__in=chunks).values_list(
- "id", "checksum", "size"
- )
- # Reject all files that exceed the maximum allowed size for this
- # organization. This value cannot be
- file_size = sum(x[2] for x in file_blobs)
- if file_size > MAX_FILE_SIZE:
- set_assemble_status(
- task,
- organization.id,
- checksum,
- ChunkFileState.ERROR,
- detail="File exceeds maximum size",
- )
- return
- # Sanity check. In case not all blobs exist at this point we have a
- # race condition.
- if set(x[1] for x in file_blobs) != set(chunks):
- set_assemble_status(
- task,
- organization.id,
- checksum,
- ChunkFileState.ERROR,
- detail="Not all chunks available for assembling",
- )
- return
- # Ensure blobs are in the order and duplication in which they were
- # transmitted. Otherwise, we would assemble the file in the wrong order.
- ids_by_checksum = {chks: id for id, chks, _ in file_blobs}
- file_blob_ids = [ids_by_checksum[c] for c in chunks]
- file = File.objects.create(name=name, checksum=checksum, type=file_type)
- try:
- temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum)
- except AssembleChecksumMismatch:
- file.delete()
- set_assemble_status(
- task,
- organization.id,
- checksum,
- ChunkFileState.ERROR,
- detail="Reported checksum mismatch",
- )
- else:
- file.save()
- return file, temp_file
|