test_project_artifact_lookup.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. import zipfile
  2. from datetime import datetime, timezone
  3. from io import BytesIO
  4. from uuid import uuid4
  5. import pytest
  6. from django.urls import reverse
  7. from sentry.models import ArtifactBundle, DebugIdArtifactBundle, File, ReleaseFile, SourceFileType
  8. from sentry.models.artifactbundle import ReleaseArtifactBundle
  9. from sentry.models.releasefile import read_artifact_index, update_artifact_index
  10. from sentry.testutils import APITestCase
  11. from sentry.utils import json
  12. def make_file(artifact_name, content, type="artifact.bundle", headers=None):
  13. file = File.objects.create(name=artifact_name, type=type, headers=(headers or {}))
  14. file.putfile(BytesIO(content))
  15. return file
  16. def make_compressed_zip_file(artifact_name, files):
  17. def remove_and_return(dictionary, key):
  18. dictionary.pop(key)
  19. return dictionary
  20. compressed = BytesIO()
  21. with zipfile.ZipFile(compressed, mode="w") as zip_file:
  22. for file_path, info in files.items():
  23. zip_file.writestr(file_path, bytes(info["content"]))
  24. zip_file.writestr(
  25. "manifest.json",
  26. json.dumps(
  27. {
  28. # We remove the "content" key in the original dict, thus no subsequent calls should be made.
  29. "files": {
  30. file_path: remove_and_return(info, "content")
  31. for file_path, info in files.items()
  32. }
  33. }
  34. ),
  35. )
  36. compressed.seek(0)
  37. file = File.objects.create(name=artifact_name, type="artifact.bundle")
  38. file.putfile(compressed)
  39. return file
  40. class ArtifactLookupTest(APITestCase):
  41. def assert_download_matches_file(self, url: str, file: File):
  42. response = self.client.get(url)
  43. with file.getfile() as file:
  44. for chunk in response:
  45. assert file.read(len(chunk)) == chunk
  46. def create_archive(self, fields, files, dist=None):
  47. manifest = dict(
  48. fields, files={filename: {"url": f"fake://{filename}"} for filename in files}
  49. )
  50. buffer = BytesIO()
  51. with zipfile.ZipFile(buffer, mode="w") as zf:
  52. zf.writestr("manifest.json", json.dumps(manifest))
  53. for filename, content in files.items():
  54. zf.writestr(filename, content)
  55. buffer.seek(0)
  56. file_ = File.objects.create(name=str(hash(tuple(files.items()))))
  57. file_.putfile(buffer)
  58. file_.update(timestamp=datetime(2021, 6, 11, 9, 13, 1, 317902, tzinfo=timezone.utc))
  59. return (update_artifact_index(self.release, dist, file_), file_)
  60. def test_query_by_debug_ids(self):
  61. debug_id_a = "aaaaaaaa-0000-0000-0000-000000000000"
  62. debug_id_b = "bbbbbbbb-0000-0000-0000-000000000000"
  63. file_ab = make_file("bundle_ab.zip", b"ab")
  64. bundle_id_ab = uuid4()
  65. artifact_bundle_ab = ArtifactBundle.objects.create(
  66. organization_id=self.organization.id,
  67. bundle_id=bundle_id_ab,
  68. file=file_ab,
  69. artifact_count=2,
  70. )
  71. DebugIdArtifactBundle.objects.create(
  72. organization_id=self.organization.id,
  73. debug_id=debug_id_a,
  74. artifact_bundle=artifact_bundle_ab,
  75. source_file_type=SourceFileType.SOURCE_MAP.value,
  76. )
  77. DebugIdArtifactBundle.objects.create(
  78. organization_id=self.organization.id,
  79. debug_id=debug_id_b,
  80. artifact_bundle=artifact_bundle_ab,
  81. source_file_type=SourceFileType.SOURCE_MAP.value,
  82. )
  83. debug_id_c = "cccccccc-0000-0000-0000-000000000000"
  84. file_c = make_file("bundle_c.zip", b"c")
  85. bundle_id_c = uuid4()
  86. artifact_bundle_c = ArtifactBundle.objects.create(
  87. organization_id=self.organization.id,
  88. bundle_id=bundle_id_c,
  89. file=file_c,
  90. artifact_count=1,
  91. )
  92. DebugIdArtifactBundle.objects.create(
  93. organization_id=self.organization.id,
  94. debug_id=debug_id_c,
  95. artifact_bundle=artifact_bundle_c,
  96. source_file_type=SourceFileType.SOURCE_MAP.value,
  97. )
  98. self.login_as(user=self.user)
  99. url = reverse(
  100. "sentry-api-0-project-artifact-lookup",
  101. kwargs={
  102. "organization_slug": self.project.organization.slug,
  103. "project_slug": self.project.slug,
  104. },
  105. )
  106. # query by one debug-id
  107. response = self.client.get(f"{url}?debug_id={debug_id_a}").json()
  108. assert len(response) == 1
  109. assert response[0]["type"] == "bundle"
  110. self.assert_download_matches_file(response[0]["url"], file_ab)
  111. # query by two debug-ids pointing to the same bundle
  112. response = self.client.get(f"{url}?debug_id={debug_id_a}&debug_id={debug_id_b}").json()
  113. assert len(response) == 1
  114. assert response[0]["type"] == "bundle"
  115. self.assert_download_matches_file(response[0]["url"], file_ab)
  116. # query by two debug-ids pointing to different bundles
  117. response = self.client.get(f"{url}?debug_id={debug_id_a}&debug_id={debug_id_c}").json()
  118. assert len(response) == 2
  119. assert response[0]["type"] == "bundle"
  120. self.assert_download_matches_file(response[0]["url"], file_ab)
  121. assert response[1]["type"] == "bundle"
  122. self.assert_download_matches_file(response[1]["url"], file_c)
  123. def test_query_by_url(self):
  124. debug_id_a = "aaaaaaaa-0000-0000-0000-000000000000"
  125. file_a = make_compressed_zip_file(
  126. "bundle_a.zip",
  127. {
  128. "path/in/zip": {
  129. "url": "~/path/to/app.js",
  130. "type": "source_map",
  131. "content": b"foo",
  132. "headers": {
  133. "debug-id": debug_id_a,
  134. },
  135. },
  136. },
  137. )
  138. file_b = make_compressed_zip_file(
  139. "bundle_b.zip",
  140. {
  141. "path/in/zip_a": {
  142. "url": "~/path/to/app.js",
  143. "type": "source_map",
  144. "content": b"foo",
  145. },
  146. "path/in/zip_b": {
  147. "url": "~/path/to/other/app.js",
  148. "type": "source_map",
  149. "content": b"bar",
  150. },
  151. },
  152. )
  153. artifact_bundle_a = ArtifactBundle.objects.create(
  154. organization_id=self.organization.id, bundle_id=uuid4(), file=file_a, artifact_count=1
  155. )
  156. DebugIdArtifactBundle.objects.create(
  157. organization_id=self.organization.id,
  158. debug_id=debug_id_a,
  159. artifact_bundle=artifact_bundle_a,
  160. source_file_type=SourceFileType.SOURCE_MAP.value,
  161. )
  162. artifact_bundle_b = ArtifactBundle.objects.create(
  163. organization_id=self.organization.id, bundle_id=uuid4(), file=file_b, artifact_count=2
  164. )
  165. dist = self.release.add_dist("whatever")
  166. ReleaseArtifactBundle.objects.create(
  167. organization_id=self.organization.id,
  168. release_name=self.release.version,
  169. dist_name=dist.name,
  170. artifact_bundle=artifact_bundle_a,
  171. )
  172. ReleaseArtifactBundle.objects.create(
  173. organization_id=self.organization.id,
  174. release_name=self.release.version,
  175. dist_name=dist.name,
  176. artifact_bundle=artifact_bundle_b,
  177. )
  178. self.login_as(user=self.user)
  179. url = reverse(
  180. "sentry-api-0-project-artifact-lookup",
  181. kwargs={
  182. "organization_slug": self.project.organization.slug,
  183. "project_slug": self.project.slug,
  184. },
  185. )
  186. # query by url that is in both files, we only want to get one though
  187. response = self.client.get(
  188. f"{url}?release={self.release.version}&dist={dist.name}&url=path/to/app"
  189. ).json()
  190. assert len(response) == 1
  191. assert response[0]["type"] == "bundle"
  192. self.assert_download_matches_file(response[0]["url"], file_a)
  193. # query by two urls yielding two bundles
  194. response = self.client.get(
  195. f"{url}?release={self.release.version}&dist={dist.name}&url=path/to/app&url=path/to/other/app"
  196. ).json()
  197. assert len(response) == 2
  198. assert response[0]["type"] == "bundle"
  199. self.assert_download_matches_file(response[0]["url"], file_a)
  200. assert response[1]["type"] == "bundle"
  201. self.assert_download_matches_file(response[1]["url"], file_b)
  202. # query by both debug-id and url with overlapping bundles
  203. response = self.client.get(
  204. f"{url}?release={self.release.version}&dist={dist.name}&debug_id={debug_id_a}&url=path/to/app"
  205. ).json()
  206. assert len(response) == 1
  207. assert response[0]["type"] == "bundle"
  208. self.assert_download_matches_file(response[0]["url"], file_a)
  209. # query by both debug-id and url
  210. response = self.client.get(
  211. f"{url}?release={self.release.version}&dist={dist.name}&debug_id={debug_id_a}&url=path/to/other/app"
  212. ).json()
  213. assert len(response) == 2
  214. assert response[0]["type"] == "bundle"
  215. self.assert_download_matches_file(response[0]["url"], file_a)
  216. assert response[1]["type"] == "bundle"
  217. self.assert_download_matches_file(response[1]["url"], file_b)
  218. def test_query_by_url_from_releasefiles(self):
  219. file_headers = {"Sourcemap": "application.js.map"}
  220. file = make_file("application.js", b"wat", "release.file", file_headers)
  221. ReleaseFile.objects.create(
  222. organization_id=self.project.organization_id,
  223. release_id=self.release.id,
  224. file=file,
  225. name="http://example.com/application.js",
  226. )
  227. self.login_as(user=self.user)
  228. url = reverse(
  229. "sentry-api-0-project-artifact-lookup",
  230. kwargs={
  231. "organization_slug": self.project.organization.slug,
  232. "project_slug": self.project.slug,
  233. },
  234. )
  235. response = self.client.get(
  236. f"{url}?release={self.release.version}&url=application.js"
  237. ).json()
  238. assert len(response) == 1
  239. assert response[0]["type"] == "file"
  240. assert response[0]["abs_path"] == "http://example.com/application.js"
  241. assert response[0]["headers"] == file_headers
  242. self.assert_download_matches_file(response[0]["url"], file)
  243. @pytest.mark.skip(
  244. reason="flakey: https://sentry.sentry.io/issues/4024152695/?cursor=0%3A200%3A0&project=2423079"
  245. )
  246. def test_query_by_url_from_artifact_index(self):
  247. self.login_as(user=self.user)
  248. url = reverse(
  249. "sentry-api-0-project-artifact-lookup",
  250. kwargs={
  251. "organization_slug": self.project.organization.slug,
  252. "project_slug": self.project.slug,
  253. },
  254. )
  255. assert read_artifact_index(self.release, None) is None
  256. archive1, archive1_file = self.create_archive(
  257. fields={},
  258. files={
  259. "foo": "foo",
  260. "bar": "bar",
  261. },
  262. )
  263. assert read_artifact_index(self.release, None) == {
  264. "files": {
  265. "fake://foo": {
  266. "archive_ident": archive1.ident,
  267. "date_created": "2021-06-11T09:13:01.317902Z",
  268. "filename": "foo",
  269. "sha1": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33",
  270. "size": 3,
  271. },
  272. "fake://bar": {
  273. "archive_ident": archive1.ident,
  274. "date_created": "2021-06-11T09:13:01.317902Z",
  275. "filename": "bar",
  276. "sha1": "62cdb7020ff920e5aa642c3d4066950dd1f01f4d",
  277. "size": 3,
  278. },
  279. },
  280. }
  281. # Should download 1 archives as both files are within a single archive
  282. response = self.client.get(f"{url}?release={self.release.version}&url=foo&url=bar").json()
  283. assert len(response) == 1
  284. assert response[0]["type"] == "bundle"
  285. self.assert_download_matches_file(response[0]["url"], archive1_file)
  286. # Override `bar` file inside the index. It will now have different `sha1`` and different `archive_ident` as it comes from other archive.
  287. archive2, archive2_file = self.create_archive(
  288. fields={},
  289. files={
  290. "bar": "BAR",
  291. },
  292. )
  293. assert read_artifact_index(self.release, None) == {
  294. "files": {
  295. "fake://foo": {
  296. "archive_ident": archive1.ident,
  297. "date_created": "2021-06-11T09:13:01.317902Z",
  298. "filename": "foo",
  299. "sha1": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33",
  300. "size": 3,
  301. },
  302. "fake://bar": {
  303. "archive_ident": archive2.ident,
  304. "date_created": "2021-06-11T09:13:01.317902Z",
  305. "filename": "bar",
  306. "sha1": "a5d5c1bba91fdb6c669e1ae0413820885bbfc455",
  307. "size": 3,
  308. },
  309. },
  310. }
  311. response = self.client.get(f"{url}?release={self.release.version}&url=foo").json()
  312. assert len(response) == 1
  313. assert response[0]["type"] == "bundle"
  314. self.assert_download_matches_file(response[0]["url"], archive1_file)
  315. # Should download 2 archives as they have different `archive_ident`
  316. response = self.client.get(f"{url}?release={self.release.version}&url=foo&url=bar").json()
  317. assert len(response) == 2
  318. assert response[0]["type"] == "bundle"
  319. self.assert_download_matches_file(response[0]["url"], archive1_file)
  320. assert response[1]["type"] == "bundle"
  321. self.assert_download_matches_file(response[1]["url"], archive2_file)
  322. def test_query_by_url_and_dist_from_artifact_index(self):
  323. self.login_as(user=self.user)
  324. url = reverse(
  325. "sentry-api-0-project-artifact-lookup",
  326. kwargs={
  327. "organization_slug": self.project.organization.slug,
  328. "project_slug": self.project.slug,
  329. },
  330. )
  331. dist = self.release.add_dist("foo")
  332. archive1, archive1_file = self.create_archive(
  333. fields={},
  334. files={
  335. "foo": "foo",
  336. "bar": "bar",
  337. },
  338. dist=dist,
  339. )
  340. # No index for dist-less requests.
  341. assert read_artifact_index(self.release, None) is None
  342. assert read_artifact_index(self.release, dist) == {
  343. "files": {
  344. "fake://foo": {
  345. "archive_ident": archive1.ident,
  346. "date_created": "2021-06-11T09:13:01.317902Z",
  347. "filename": "foo",
  348. "sha1": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33",
  349. "size": 3,
  350. },
  351. "fake://bar": {
  352. "archive_ident": archive1.ident,
  353. "date_created": "2021-06-11T09:13:01.317902Z",
  354. "filename": "bar",
  355. "sha1": "62cdb7020ff920e5aa642c3d4066950dd1f01f4d",
  356. "size": 3,
  357. },
  358. },
  359. }
  360. # Should download 1 archives as both files are within a single archive
  361. response = self.client.get(
  362. f"{url}?release={self.release.version}&url=foo&url=bar&dist=foo"
  363. ).json()
  364. assert len(response) == 1
  365. assert response[0]["type"] == "bundle"
  366. self.assert_download_matches_file(response[0]["url"], archive1_file)
  367. # Override `bar` file inside the index. It will now have different `sha1`` and different `archive_ident` as it comes from other archive.
  368. archive2, archive2_file = self.create_archive(
  369. fields={},
  370. files={
  371. "bar": "BAR",
  372. },
  373. dist=dist,
  374. )
  375. assert read_artifact_index(self.release, dist) == {
  376. "files": {
  377. "fake://foo": {
  378. "archive_ident": archive1.ident,
  379. "date_created": "2021-06-11T09:13:01.317902Z",
  380. "filename": "foo",
  381. "sha1": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33",
  382. "size": 3,
  383. },
  384. "fake://bar": {
  385. "archive_ident": archive2.ident,
  386. "date_created": "2021-06-11T09:13:01.317902Z",
  387. "filename": "bar",
  388. "sha1": "a5d5c1bba91fdb6c669e1ae0413820885bbfc455",
  389. "size": 3,
  390. },
  391. },
  392. }
  393. response = self.client.get(
  394. f"{url}?release={self.release.version}&url=foo&dist={dist.name}"
  395. ).json()
  396. assert len(response) == 1
  397. assert response[0]["type"] == "bundle"
  398. self.assert_download_matches_file(response[0]["url"], archive1_file)
  399. # Should download 2 archives as they have different `archive_ident`
  400. response = self.client.get(
  401. f"{url}?release={self.release.version}&url=foo&url=bar&dist={dist.name}"
  402. ).json()
  403. assert len(response) == 2
  404. assert response[0]["type"] == "bundle"
  405. self.assert_download_matches_file(response[0]["url"], archive1_file)
  406. assert response[1]["type"] == "bundle"
  407. self.assert_download_matches_file(response[1]["url"], archive2_file)