assemble.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. """Partial port of sentry/tasks/assemble.py"""
  2. import hashlib
  3. import json
  4. import shutil
  5. import tempfile
  6. from enum import Enum
  7. from os import path
  8. from django.core.cache import cache
  9. from apps.organizations_ext.models import Organization
  10. from apps.releases.models import Release
  11. from apps.sourcecode.models import DebugSymbolBundle
  12. from sentry.utils.zip import safe_extract_zip
  13. from .exceptions import AssembleArtifactsError, AssembleChecksumMismatch
  14. from .models import File, FileBlob
  15. MAX_FILE_SIZE = 2**31 # 2GB is the maximum offset supported by fileblob
  16. class ChunkFileState(Enum):
  17. OK = "ok" # File in database
  18. NOT_FOUND = "not_found" # File not found in database
  19. CREATED = "created" # File was created in the request and send to the worker for assembling
  20. ASSEMBLING = "assembling" # File still being processed by worker
  21. ERROR = "error" # Error happened during assembling
  22. class AssembleTask(Enum):
  23. DIF = "project.dsym" # Debug file upload
  24. ARTIFACTS = "organization.artifacts" # Release file upload
  25. def _get_cache_key(task, scope, checksum):
  26. """Computes the cache key for assemble status.
  27. ``task`` must be one of the ``AssembleTask`` values. The scope can be the
  28. identifier of any model, such as the organization or project that this task
  29. is performed under.
  30. ``checksum`` should be the SHA1 hash of the main file that is being
  31. assembled.
  32. """
  33. return (
  34. "assemble-status:%s"
  35. % hashlib.sha1(
  36. ("%s|%s|%s" % (scope, checksum.encode("ascii"), task)).encode()
  37. ).hexdigest()
  38. )
  39. def set_assemble_status(
  40. task: AssembleTask, scope, checksum, state: ChunkFileState, detail=None
  41. ):
  42. """
  43. Updates the status of an assembling task. It is cached for 10 minutes.
  44. """
  45. cache_key = _get_cache_key(task, scope, checksum)
  46. cache.set(cache_key, (state, detail), 600)
  47. def assemble_artifacts(
  48. organization: Organization, version: str | None, checksum: str, chunks: list[str]
  49. ):
  50. set_assemble_status(
  51. AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.ASSEMBLING
  52. )
  53. # Assemble the chunks into a temporary file
  54. rv = assemble_file(
  55. AssembleTask.ARTIFACTS,
  56. organization,
  57. "release-artifacts.zip",
  58. checksum,
  59. chunks,
  60. file_type="release.bundle",
  61. )
  62. if rv is None:
  63. return
  64. bundle, temp_file = rv
  65. scratchpad = tempfile.mkdtemp()
  66. try:
  67. safe_extract_zip(temp_file, scratchpad, strip_toplevel=False)
  68. except BaseException as ex:
  69. raise AssembleArtifactsError("failed to extract bundle") from ex
  70. try:
  71. manifest_path = path.join(scratchpad, "manifest.json")
  72. with open(manifest_path, "rb") as manifest:
  73. manifest = json.loads(manifest.read())
  74. except BaseException as ex:
  75. raise AssembleArtifactsError("failed to open release manifest") from ex
  76. if organization.slug != manifest.get("org"):
  77. raise AssembleArtifactsError("organization does not match uploaded bundle")
  78. release_name = manifest.get("release")
  79. if release_name != version:
  80. raise AssembleArtifactsError("release does not match uploaded bundle")
  81. release: Release | None = None
  82. if release_name:
  83. release, _ = Release.objects.get_or_create(
  84. organization=organization, version=release_name
  85. )
  86. # Sentry OSS would add dist to release here
  87. artifacts = manifest.get("files", {})
  88. files = []
  89. for rel_path, artifact in artifacts.items():
  90. artifact_url = artifact.get("url", rel_path)
  91. artifact_basename = artifact_url.rsplit("/", 1)[-1]
  92. headers = artifact.get("headers", {})
  93. file = File.objects.create(
  94. name=artifact_basename,
  95. type=artifact["type"],
  96. headers=headers,
  97. )
  98. files.append(file)
  99. full_path = path.join(scratchpad, rel_path)
  100. with open(full_path, "rb") as fp:
  101. file.putfile(fp)
  102. bundles: list[DebugSymbolBundle] = []
  103. for file in files:
  104. if file.type == "minified_source":
  105. try:
  106. sourcemap_file = next(
  107. value
  108. for value in files
  109. if value.type == "source_map"
  110. and (
  111. file.headers.get("sourcemap", file.headers.get("Sourcemap"))
  112. == value.name
  113. or (
  114. value.headers.get("debug-id")
  115. and value.headers.get("debug-id")
  116. == file.headers.get("debug-id")
  117. )
  118. )
  119. )
  120. except StopIteration:
  121. sourcemap_file = None
  122. if sourcemap_file:
  123. bundles.append(
  124. DebugSymbolBundle(
  125. organization=organization,
  126. debug_id=file.headers.get("debug-id"),
  127. release=release,
  128. sourcemap_file=sourcemap_file,
  129. file=file,
  130. )
  131. )
  132. DebugSymbolBundle.objects.bulk_create(
  133. bundles,
  134. ignore_conflicts=True,
  135. # unique_fields=["organization", "debug_id", "release"],
  136. # update_fields=["file", "sourcemap_file"],
  137. )
  138. # May need to readd this logic but in bulk
  139. # if not created:
  140. # old_file = release_file.file
  141. # release_file.file = file
  142. # release_file.save(update_fields=["file"])
  143. # old_file.delete()
  144. set_assemble_status(
  145. AssembleTask.ARTIFACTS, organization.pk, checksum, ChunkFileState.OK
  146. )
  147. shutil.rmtree(scratchpad)
  148. bundle.delete()
  149. def assemble_file(
  150. task: AssembleTask,
  151. organization: Organization,
  152. name: str,
  153. checksum,
  154. chunks,
  155. file_type,
  156. ):
  157. """
  158. Verifies and assembles a file model from chunks.
  159. This downloads all chunks from blob store to verify their integrity and
  160. associates them with a created file model. Additionally, it assembles the
  161. full file in a temporary location and verifies the complete content hash.
  162. Returns a tuple ``(File, TempFile)`` on success, or ``None`` on error.
  163. """
  164. # Load all FileBlobs from db since we can be sure here we already own all
  165. # chunks need to build the file
  166. file_blobs = FileBlob.objects.filter(checksum__in=chunks).values_list(
  167. "id", "checksum", "size"
  168. )
  169. # Reject all files that exceed the maximum allowed size for this
  170. # organization. This value cannot be
  171. file_size = sum(x[2] for x in file_blobs)
  172. if file_size > MAX_FILE_SIZE:
  173. set_assemble_status(
  174. task,
  175. organization.id,
  176. checksum,
  177. ChunkFileState.ERROR,
  178. detail="File exceeds maximum size",
  179. )
  180. return
  181. # Sanity check. In case not all blobs exist at this point we have a
  182. # race condition.
  183. if set(x[1] for x in file_blobs) != set(chunks):
  184. set_assemble_status(
  185. task,
  186. organization.id,
  187. checksum,
  188. ChunkFileState.ERROR,
  189. detail="Not all chunks available for assembling",
  190. )
  191. return
  192. # Ensure blobs are in the order and duplication in which they were
  193. # transmitted. Otherwise, we would assemble the file in the wrong order.
  194. ids_by_checksum = {chks: id for id, chks, _ in file_blobs}
  195. file_blob_ids = [ids_by_checksum[c] for c in chunks]
  196. file = File.objects.create(name=name, checksum=checksum, type=file_type)
  197. try:
  198. temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum)
  199. except AssembleChecksumMismatch:
  200. file.delete()
  201. set_assemble_status(
  202. task,
  203. organization.id,
  204. checksum,
  205. ChunkFileState.ERROR,
  206. detail="Reported checksum mismatch",
  207. )
  208. else:
  209. file.save()
  210. return file, temp_file