assemble.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. """ Partial port of sentry/tasks/assemble.py """
  2. import hashlib
  3. import json
  4. import shutil
  5. import tempfile
  6. from enum import Enum
  7. from os import path
  8. from django.core.cache import cache
  9. from django.core.files import File
  10. from django.db import IntegrityError, transaction
  11. from organizations_ext.models import Organization
  12. from releases.models import Release, ReleaseFile
  13. from sentry.utils.zip import safe_extract_zip
  14. from .exceptions import AssembleArtifactsError, AssembleChecksumMismatch
  15. from .models import File, FileBlob
  16. MAX_FILE_SIZE = 2**31 # 2GB is the maximum offset supported by fileblob
  17. class ChunkFileState(Enum):
  18. OK = "ok" # File in database
  19. NOT_FOUND = "not_found" # File not found in database
  20. CREATED = "created" # File was created in the request and send to the worker for assembling
  21. ASSEMBLING = "assembling" # File still being processed by worker
  22. ERROR = "error" # Error happened during assembling
  23. class AssembleTask(Enum):
  24. DIF = "project.dsym" # Debug file upload
  25. ARTIFACTS = "organization.artifacts" # Release file upload
  26. def _get_cache_key(task, scope, checksum):
  27. """Computes the cache key for assemble status.
  28. ``task`` must be one of the ``AssembleTask`` values. The scope can be the
  29. identifier of any model, such as the organization or project that this task
  30. is performed under.
  31. ``checksum`` should be the SHA1 hash of the main file that is being
  32. assembled.
  33. """
  34. return (
  35. "assemble-status:%s"
  36. % hashlib.sha1(
  37. ("%s|%s|%s" % (scope, checksum.encode("ascii"), task)).encode()
  38. ).hexdigest()
  39. )
  40. def set_assemble_status(
  41. task: AssembleTask, scope, checksum, state: ChunkFileState, detail=None
  42. ):
  43. """
  44. Updates the status of an assembling task. It is cached for 10 minutes.
  45. """
  46. cache_key = _get_cache_key(task, scope, checksum)
  47. cache.set(cache_key, (state, detail), 600)
  48. def assemble_artifacts(organization, version, checksum, chunks):
  49. set_assemble_status(
  50. AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.ASSEMBLING
  51. )
  52. # Assemble the chunks into a temporary file
  53. rv = assemble_file(
  54. AssembleTask.ARTIFACTS,
  55. organization,
  56. "release-artifacts.zip",
  57. checksum,
  58. chunks,
  59. file_type="release.bundle",
  60. )
  61. if rv is None:
  62. return
  63. bundle, temp_file = rv
  64. scratchpad = tempfile.mkdtemp()
  65. try:
  66. safe_extract_zip(temp_file, scratchpad, strip_toplevel=False)
  67. except BaseException as ex:
  68. raise AssembleArtifactsError("failed to extract bundle") from ex
  69. try:
  70. manifest_path = path.join(scratchpad, "manifest.json")
  71. with open(manifest_path, "rb") as manifest:
  72. manifest = json.loads(manifest.read())
  73. except BaseException as ex:
  74. raise AssembleArtifactsError("failed to open release manifest") from ex
  75. if organization.slug != manifest.get("org"):
  76. raise AssembleArtifactsError("organization does not match uploaded bundle")
  77. release_name = manifest.get("release")
  78. if release_name != version:
  79. raise AssembleArtifactsError("release does not match uploaded bundle")
  80. try:
  81. release = organization.release_set.get(version=release_name)
  82. except Release.DoesNotExist as ex:
  83. raise AssembleArtifactsError("release does not exist") from ex
  84. # Sentry would add dist to release here
  85. artifacts = manifest.get("files", {})
  86. for rel_path, artifact in artifacts.items():
  87. artifact_url = artifact.get("url", rel_path)
  88. artifact_basename = artifact_url.rsplit("/", 1)[-1]
  89. file = File.objects.create(
  90. name=artifact_basename,
  91. type="release.file",
  92. headers=artifact.get("headers", {}),
  93. )
  94. full_path = path.join(scratchpad, rel_path)
  95. with open(full_path, "rb") as fp:
  96. file.putfile(fp)
  97. kwargs = {
  98. "organization_id": organization.id,
  99. "release": release,
  100. "name": artifact_url,
  101. # "dist": dist,
  102. }
  103. release_file, created = ReleaseFile.objects.get_or_create(
  104. release=release, name=artifact_url, defaults={"file": file}
  105. )
  106. if not created:
  107. old_file = release_file.file
  108. release_file.file = file
  109. release_file.save(update_fields=["file"])
  110. old_file.delete()
  111. set_assemble_status(
  112. AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.OK
  113. )
  114. shutil.rmtree(scratchpad)
  115. bundle.delete()
  116. def assemble_file(
  117. task: AssembleTask,
  118. organization: Organization,
  119. name: str,
  120. checksum,
  121. chunks,
  122. file_type,
  123. ):
  124. """
  125. Verifies and assembles a file model from chunks.
  126. This downloads all chunks from blob store to verify their integrity and
  127. associates them with a created file model. Additionally, it assembles the
  128. full file in a temporary location and verifies the complete content hash.
  129. Returns a tuple ``(File, TempFile)`` on success, or ``None`` on error.
  130. """
  131. # Load all FileBlobs from db since we can be sure here we already own all
  132. # chunks need to build the file
  133. file_blobs = FileBlob.objects.filter(checksum__in=chunks).values_list(
  134. "id", "checksum", "size"
  135. )
  136. # Reject all files that exceed the maximum allowed size for this
  137. # organization. This value cannot be
  138. file_size = sum(x[2] for x in file_blobs)
  139. if file_size > MAX_FILE_SIZE:
  140. set_assemble_status(
  141. task,
  142. organization.id,
  143. checksum,
  144. ChunkFileState.ERROR,
  145. detail="File exceeds maximum size",
  146. )
  147. return
  148. # Sanity check. In case not all blobs exist at this point we have a
  149. # race condition.
  150. if set(x[1] for x in file_blobs) != set(chunks):
  151. set_assemble_status(
  152. task,
  153. organization.id,
  154. checksum,
  155. ChunkFileState.ERROR,
  156. detail="Not all chunks available for assembling",
  157. )
  158. return
  159. # Ensure blobs are in the order and duplication in which they were
  160. # transmitted. Otherwise, we would assemble the file in the wrong order.
  161. ids_by_checksum = {chks: id for id, chks, _ in file_blobs}
  162. file_blob_ids = [ids_by_checksum[c] for c in chunks]
  163. file = File.objects.create(name=name, checksum=checksum, type=file_type)
  164. try:
  165. temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum)
  166. except AssembleChecksumMismatch:
  167. file.delete()
  168. set_assemble_status(
  169. task,
  170. organization.id,
  171. checksum,
  172. ChunkFileState.ERROR,
  173. detail="Reported checksum mismatch",
  174. )
  175. else:
  176. file.save()
  177. return file, temp_file