7 years ago · a612fd6d14
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,8 @@
 
				 Version 8.21 (Unreleased)
			
 
				 -------------------------
			
 
				 
			
 
				+- Ignore querystrings when looking up release artifacts
			
 
				+
			
 
				 Version 8.20
			
 
				 ------------
			
 
				 - Make BitBucket repositories enabled by default
			
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -11,7 +11,7 @@ import zlib
 
				 from django.conf import settings
			
 
				 from os.path import splitext
			
 
				 from requests.utils import get_encoding_from_headers
			
 
				-from six.moves.urllib.parse import urlparse, urljoin, urlsplit
			
 
				+from six.moves.urllib.parse import urljoin, urlsplit
			
 
				 from libsourcemap import from_json as view_from_json
			
 
				 
			
 
				 # In case SSL is unavailable (light builds) we can't import this here.
			
@@ -197,29 +197,19 @@ def discover_sourcemap(result):
 
				 def fetch_release_file(filename, release, dist=None):
			
 
				     cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), )
			
 
				 
			
 
				-    filename_path = None
			
 
				-    if filename is not None:
			
 
				-        # Reconstruct url without protocol + host
			
 
				-        # e.g. http://example.com/foo?bar => ~/foo?bar
			
 
				-        parsed_url = urlparse(filename)
			
 
				-        filename_path = '~' + parsed_url.path
			
 
				-        if parsed_url.query:
			
 
				-            filename_path += '?' + parsed_url.query
			
 
				-
			
 
				     logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id)
			
 
				     result = cache.get(cache_key)
			
 
				 
			
 
				     dist_name = dist and dist.name or None
			
 
				 
			
 
				     if result is None:
			
 
				+        filename_choices = ReleaseFile.normalize(filename)
			
 
				+        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]
			
 
				+
			
 
				         logger.debug(
			
 
				             'Checking database for release artifact %r (release_id=%s)', filename, release.id
			
 
				         )
			
 
				 
			
 
				-        filename_idents = [ReleaseFile.get_ident(filename, dist_name)]
			
 
				-        if filename_path is not None and filename_path != filename:
			
 
				-            filename_idents.append(ReleaseFile.get_ident(filename_path, dist_name))
			
 
				-
			
 
				         possible_files = list(
			
 
				             ReleaseFile.objects.filter(
			
 
				                 release=release,
			
@@ -237,10 +227,15 @@ def fetch_release_file(filename, release, dist=None):
 
				         elif len(possible_files) == 1:
			
 
				             releasefile = possible_files[0]
			
 
				         else:
			
 
				-            # Prioritize releasefile that matches full url (w/ host)
			
 
				-            # over hostless releasefile
			
 
				-            target_ident = filename_idents[0]
			
 
				-            releasefile = next((f for f in possible_files if f.ident == target_ident))
			
 
				+            # Pick first one that matches in priority order.
			
 
				+            # This is O(N*M) but there are only ever at most 4 things here
			
 
				+            # so not really worth optimizing.
			
 
				+            releasefile = next((
			
 
				+                rf
			
 
				+                for ident in filename_idents
			
 
				+                for rf in possible_files
			
 
				+                if rf.ident == ident
			
 
				+            ))
			
 
				 
			
 
				         logger.debug(
			
 
				             'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id
			
--- a/src/sentry/models/releasefile.py
+++ b/src/sentry/models/releasefile.py
@@ -9,6 +9,7 @@ sentry.models.releasefile
 
				 from __future__ import absolute_import
			
 
				 
			
 
				 from django.db import models
			
 
				+from six.moves.urllib.parse import urlsplit, urlunsplit
			
 
				 
			
 
				 from sentry.db.models import BoundedPositiveIntegerField, FlexibleForeignKey, Model, sane_repr
			
 
				 from sentry.utils.hashlib import sha1_text
			
@@ -59,3 +60,25 @@ class ReleaseFile(Model):
 
				         if dist is not None:
			
 
				             return sha1_text(name + '\x00\x00' + dist).hexdigest()
			
 
				         return sha1_text(name).hexdigest()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def normalize(cls, url):
			
 
				+        """Transforms a full absolute url into 2 or 4 generalized options
			
 
				+
			
 
				+        * the original url as input
			
 
				+        * (optional) original url without querystring
			
 
				+        * the full url, but stripped of scheme and netloc
			
 
				+        * (optional) full url without scheme and netloc or querystring
			
 
				+        """
			
 
				+        # Always ignore the fragment
			
 
				+        scheme, netloc, path, query, _ = urlsplit(url)
			
 
				+        uri_relative = (None, None, path, query, None)
			
 
				+        uri_without_query = (scheme, netloc, path, None, None)
			
 
				+        uri_relative_without_query = (None, None, path, None, None)
			
 
				+        urls = [url]
			
 
				+        if query:
			
 
				+            urls.append(urlunsplit(uri_without_query))
			
 
				+        urls.append('~' + urlunsplit(uri_relative))
			
 
				+        if query:
			
 
				+            urls.append('~' + urlunsplit(uri_relative_without_query))
			
 
				+        return urls
			
--- a/tests/sentry/lang/javascript/test_processor.py
+++ b/tests/sentry/lang/javascript/test_processor.py
@@ -130,6 +130,41 @@ class FetchReleaseFileTest(TestCase):
 
				 
			
 
				         assert result == new_result
			
 
				 
			
 
				+    def test_fallbacks(self):
			
 
				+        project = self.project
			
 
				+        release = Release.objects.create(
			
 
				+            organization_id=project.organization_id,
			
 
				+            version='abc',
			
 
				+        )
			
 
				+        release.add_project(project)
			
 
				+
			
 
				+        file = File.objects.create(
			
 
				+            name='~/file.min.js',
			
 
				+            type='release.file',
			
 
				+            headers={'Content-Type': 'application/json; charset=utf-8'},
			
 
				+        )
			
 
				+
			
 
				+        binary_body = unicode_body.encode('utf-8')
			
 
				+        file.putfile(six.BytesIO(binary_body))
			
 
				+
			
 
				+        ReleaseFile.objects.create(
			
 
				+            name='~/file.min.js',
			
 
				+            release=release,
			
 
				+            organization_id=project.organization_id,
			
 
				+            file=file,
			
 
				+        )
			
 
				+
			
 
				+        result = fetch_release_file('http://example.com/file.min.js?lol', release)
			
 
				+
			
 
				+        assert type(result.body) is six.binary_type
			
 
				+        assert result == http.UrlResult(
			
 
				+            'http://example.com/file.min.js?lol',
			
 
				+            {'content-type': 'application/json; charset=utf-8'},
			
 
				+            binary_body,
			
 
				+            200,
			
 
				+            'utf-8',
			
 
				+        )
			
 
				+
			
 
				 
			
 
				 class FetchFileTest(TestCase):
			
 
				     @responses.activate
			
--- a/tests/sentry/models/test_releasefile.py
+++ b/tests/sentry/models/test_releasefile.py
@@ -0,0 +1,36 @@
 
				+from __future__ import absolute_import
			
 
				+
			
 
				+from sentry.models import ReleaseFile
			
 
				+from sentry.testutils import TestCase
			
 
				+
			
 
				+
			
 
				+class ReleaseFileTestCase(TestCase):
			
 
				+    def test_normalize(self):
			
 
				+        n = ReleaseFile.normalize
			
 
				+
			
 
				+        assert n('http://example.com') == [
			
 
				+            'http://example.com',
			
 
				+            '~',
			
 
				+        ]
			
 
				+        assert n('http://example.com/foo.js') == [
			
 
				+            'http://example.com/foo.js',
			
 
				+            '~/foo.js',
			
 
				+        ]
			
 
				+        assert n('http://example.com/foo.js?bar') == [
			
 
				+            'http://example.com/foo.js?bar',
			
 
				+            'http://example.com/foo.js',
			
 
				+            '~/foo.js?bar',
			
 
				+            '~/foo.js',
			
 
				+        ]
			
 
				+        assert n('/foo.js') == [
			
 
				+            '/foo.js',
			
 
				+            '~/foo.js',
			
 
				+        ]
			
 
				+
			
 
				+        # This is the current behavior, but seems weird to me.
			
 
				+        # unclear if we actually experience this case in the real
			
 
				+        # world, but worth documenting the behavior
			
 
				+        assert n('foo.js') == [
			
 
				+            'foo.js',
			
 
				+            '~foo.js',
			
 
				+        ]