assemble.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. """ Partial port of sentry/tasks/assemble.py """
  2. import hashlib
  3. import json
  4. import shutil
  5. import tempfile
  6. from enum import Enum
  7. from os import path
  8. from django.core.cache import cache
  9. from apps.organizations_ext.models import Organization
  10. from apps.releases.models import Release, ReleaseFile
  11. from sentry.utils.zip import safe_extract_zip
  12. from .exceptions import AssembleArtifactsError, AssembleChecksumMismatch
  13. from .models import File, FileBlob
  14. MAX_FILE_SIZE = 2**31 # 2GB is the maximum offset supported by fileblob
  15. class ChunkFileState(Enum):
  16. OK = "ok" # File in database
  17. NOT_FOUND = "not_found" # File not found in database
  18. CREATED = "created" # File was created in the request and send to the worker for assembling
  19. ASSEMBLING = "assembling" # File still being processed by worker
  20. ERROR = "error" # Error happened during assembling
  21. class AssembleTask(Enum):
  22. DIF = "project.dsym" # Debug file upload
  23. ARTIFACTS = "organization.artifacts" # Release file upload
  24. def _get_cache_key(task, scope, checksum):
  25. """Computes the cache key for assemble status.
  26. ``task`` must be one of the ``AssembleTask`` values. The scope can be the
  27. identifier of any model, such as the organization or project that this task
  28. is performed under.
  29. ``checksum`` should be the SHA1 hash of the main file that is being
  30. assembled.
  31. """
  32. return (
  33. "assemble-status:%s"
  34. % hashlib.sha1(
  35. ("%s|%s|%s" % (scope, checksum.encode("ascii"), task)).encode()
  36. ).hexdigest()
  37. )
  38. def set_assemble_status(
  39. task: AssembleTask, scope, checksum, state: ChunkFileState, detail=None
  40. ):
  41. """
  42. Updates the status of an assembling task. It is cached for 10 minutes.
  43. """
  44. cache_key = _get_cache_key(task, scope, checksum)
  45. cache.set(cache_key, (state, detail), 600)
  46. def assemble_artifacts(organization, version, checksum, chunks):
  47. set_assemble_status(
  48. AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.ASSEMBLING
  49. )
  50. # Assemble the chunks into a temporary file
  51. rv = assemble_file(
  52. AssembleTask.ARTIFACTS,
  53. organization,
  54. "release-artifacts.zip",
  55. checksum,
  56. chunks,
  57. file_type="release.bundle",
  58. )
  59. if rv is None:
  60. return
  61. bundle, temp_file = rv
  62. scratchpad = tempfile.mkdtemp()
  63. try:
  64. safe_extract_zip(temp_file, scratchpad, strip_toplevel=False)
  65. except BaseException as ex:
  66. raise AssembleArtifactsError("failed to extract bundle") from ex
  67. try:
  68. manifest_path = path.join(scratchpad, "manifest.json")
  69. with open(manifest_path, "rb") as manifest:
  70. manifest = json.loads(manifest.read())
  71. except BaseException as ex:
  72. raise AssembleArtifactsError("failed to open release manifest") from ex
  73. if organization.slug != manifest.get("org"):
  74. raise AssembleArtifactsError("organization does not match uploaded bundle")
  75. release_name = manifest.get("release")
  76. if release_name != version:
  77. raise AssembleArtifactsError("release does not match uploaded bundle")
  78. try:
  79. release = organization.release_set.get(version=release_name)
  80. except Release.DoesNotExist as ex:
  81. raise AssembleArtifactsError("release does not exist") from ex
  82. # Sentry would add dist to release here
  83. artifacts = manifest.get("files", {})
  84. for rel_path, artifact in artifacts.items():
  85. artifact_url = artifact.get("url", rel_path)
  86. artifact_basename = artifact_url.rsplit("/", 1)[-1]
  87. file = File.objects.create(
  88. name=artifact_basename,
  89. type="release.file",
  90. headers=artifact.get("headers", {}),
  91. )
  92. full_path = path.join(scratchpad, rel_path)
  93. with open(full_path, "rb") as fp:
  94. file.putfile(fp)
  95. # kwargs = {
  96. # "organization_id": organization.id,
  97. # "release": release,
  98. # "name": artifact_url,
  99. # # "dist": dist,
  100. # }
  101. release_file, created = ReleaseFile.objects.get_or_create(
  102. release=release, name=artifact_url, defaults={"file": file}
  103. )
  104. if not created:
  105. old_file = release_file.file
  106. release_file.file = file
  107. release_file.save(update_fields=["file"])
  108. old_file.delete()
  109. set_assemble_status(
  110. AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.OK
  111. )
  112. shutil.rmtree(scratchpad)
  113. bundle.delete()
  114. def assemble_file(
  115. task: AssembleTask,
  116. organization: Organization,
  117. name: str,
  118. checksum,
  119. chunks,
  120. file_type,
  121. ):
  122. """
  123. Verifies and assembles a file model from chunks.
  124. This downloads all chunks from blob store to verify their integrity and
  125. associates them with a created file model. Additionally, it assembles the
  126. full file in a temporary location and verifies the complete content hash.
  127. Returns a tuple ``(File, TempFile)`` on success, or ``None`` on error.
  128. """
  129. # Load all FileBlobs from db since we can be sure here we already own all
  130. # chunks need to build the file
  131. file_blobs = FileBlob.objects.filter(checksum__in=chunks).values_list(
  132. "id", "checksum", "size"
  133. )
  134. # Reject all files that exceed the maximum allowed size for this
  135. # organization. This value cannot be
  136. file_size = sum(x[2] for x in file_blobs)
  137. if file_size > MAX_FILE_SIZE:
  138. set_assemble_status(
  139. task,
  140. organization.id,
  141. checksum,
  142. ChunkFileState.ERROR,
  143. detail="File exceeds maximum size",
  144. )
  145. return
  146. # Sanity check. In case not all blobs exist at this point we have a
  147. # race condition.
  148. if set(x[1] for x in file_blobs) != set(chunks):
  149. set_assemble_status(
  150. task,
  151. organization.id,
  152. checksum,
  153. ChunkFileState.ERROR,
  154. detail="Not all chunks available for assembling",
  155. )
  156. return
  157. # Ensure blobs are in the order and duplication in which they were
  158. # transmitted. Otherwise, we would assemble the file in the wrong order.
  159. ids_by_checksum = {chks: id for id, chks, _ in file_blobs}
  160. file_blob_ids = [ids_by_checksum[c] for c in chunks]
  161. file = File.objects.create(name=name, checksum=checksum, type=file_type)
  162. try:
  163. temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum)
  164. except AssembleChecksumMismatch:
  165. file.delete()
  166. set_assemble_status(
  167. task,
  168. organization.id,
  169. checksum,
  170. ChunkFileState.ERROR,
  171. detail="Reported checksum mismatch",
  172. )
  173. else:
  174. file.save()
  175. return file, temp_file