Browse Source

feat: libsourcemap -> symbolic (#6365)

This commits changes handling in sentry from libsourcemap to symbolic
for JavaScript and proguard.
Armin Ronacher 7 years ago
parent
commit
adb72c23c9

+ 1 - 2
requirements-base.txt

@@ -19,7 +19,6 @@ hiredis>=0.1.0,<0.2.0
 honcho>=0.7.0,<0.8.0
 kombu==3.0.35
 ipaddress>=1.0.16,<1.1.0
-libsourcemap>=0.8.2,<0.9.0
 loremipsum>=1.0.5,<1.1.0
 lxml>=3.4.1
 mock>=0.8.0,<1.1
@@ -47,7 +46,7 @@ setproctitle>=1.1.7,<1.2.0
 statsd>=3.1.0,<3.2.0
 structlog==16.1.0
 sqlparse>=0.1.16,<0.2.0
-symbolic>=0.9.8,<1.0.0
+symbolic>=1.0.3,<2.0.0
 toronado>=0.0.11,<0.1.0
 ua-parser>=0.6.1,<0.8.0
 urllib3>=1.22,<1.23

+ 2 - 2
src/sentry/lang/java/plugin.py

@@ -3,7 +3,7 @@ from __future__ import absolute_import
 import six
 import uuid
 
-from libsourcemap import ProguardView
+from symbolic import ProguardMappingView
 from sentry.plugins import Plugin2
 from sentry.stacktraces import StacktraceProcessor
 from sentry.models import ProjectDSymFile, EventError
@@ -52,7 +52,7 @@ class JavaStacktraceProcessor(StacktraceProcessor):
             if dsym_path is None:
                 error_type = EventError.PROGUARD_MISSING_MAPPING
             else:
-                view = ProguardView.from_path(dsym_path)
+                view = ProguardMappingView.from_path(dsym_path)
                 if not view.has_line_info:
                     error_type = EventError.PROGUARD_MISSING_LINENO
                 else:

+ 26 - 41
src/sentry/lang/javascript/cache.py

@@ -1,10 +1,17 @@
 from __future__ import absolute_import, print_function
 
+from six import text_type
+from symbolic import SourceView
 from sentry.utils.strings import codec_lookup
 
 __all__ = ['SourceCache', 'SourceMapCache']
 
 
+def is_utf8(codec):
+    name = codec_lookup(codec).name
+    return name in ('utf-8', 'ascii')
+
+
 class SourceCache(object):
     def __init__(self):
         self._cache = {}
@@ -20,55 +27,33 @@ class SourceCache(object):
             url = self._aliases[url]
         return url
 
-    def get(self, url, raw=False):
-        url = self._get_canonical_url(url)
-        try:
-            parsed, rv = self._cache[url]
-        except KeyError:
-            return None
-
-        # We have already gotten this file and we've
-        # decoded the response, so just return
-        if parsed:
-            parsed, raw_body = rv
-            if raw:
-                return raw_body
-            return parsed
-
-        # Otherwise, we have a 2-tuple that needs to be applied
-        body, encoding = rv
-
-        # Our body is lazily evaluated if it
-        # comes from libsourcemap
-        if callable(body):
-            body = body()
-
-        raw_body = body
-        body = body.decode(codec_lookup(encoding, 'utf-8').name, 'replace').split(u'\n')
-
-        # Set back a marker to indicate we've parsed this url
-        self._cache[url] = (True, (body, raw_body))
-        return body
+    def get(self, url):
+        return self._cache.get(self._get_canonical_url(url))
 
     def get_errors(self, url):
         url = self._get_canonical_url(url)
         return self._errors.get(url, [])
 
-    def alias(self, u1, u2):
-        if u1 == u2:
-            return
-
-        if u1 in self._cache or u1 not in self._aliases:
-            self._aliases[u1] = u1
-        else:
-            self._aliases[u2] = u1
+    def alias(self, alias, target):
+        if alias != target:
+            self._aliases[alias] = target
 
     def add(self, url, source, encoding=None):
         url = self._get_canonical_url(url)
-        # Insert into the cache, an unparsed (source, encoding)
-        # tuple. This allows the source to be split and decoded
-        # on demand when first accessed.
-        self._cache[url] = (False, (source, encoding))
+
+        if not isinstance(source, SourceView):
+            if isinstance(source, text_type):
+                source = source.encode('utf-8')
+            # If an encoding is provided and it's not utf-8 compatible
+            # we try to re-encoding the source and create a source view
+            # from it.
+            elif encoding is not None and not is_utf8(encoding):
+                try:
+                    source = source.decode(encoding).encode('utf-8')
+                except UnicodeError:
+                    pass
+            source = SourceView.from_bytes(source)
+        self._cache[url] = source
 
     def add_error(self, url, error):
         url = self._get_canonical_url(url)

+ 24 - 20
src/sentry/lang/javascript/processor.py

@@ -12,7 +12,7 @@ from django.conf import settings
 from os.path import splitext
 from requests.utils import get_encoding_from_headers
 from six.moves.urllib.parse import urljoin, urlsplit
-from libsourcemap import from_json as view_from_json
+from symbolic import SourceMapView
 
 # In case SSL is unavailable (light builds) we can't import this here.
 try:
@@ -391,7 +391,7 @@ def fetch_sourcemap(url, project=None, release=None, dist=None, allow_scraping=T
         )
         body = result.body
     try:
-        return view_from_json(body)
+        return SourceMapView.from_json_bytes(body)
     except Exception as exc:
         # This is in debug because the product shows an error already.
         logger.debug(six.text_type(exc), exc_info=True)
@@ -526,7 +526,7 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
 
         # This might fail but that's okay, we try with a different path a
         # bit later down the road.
-        source = self.get_source(frame['abs_path'])
+        source = self.get_sourceview(frame['abs_path'])
 
         in_app = None
         new_frame = dict(frame)
@@ -549,11 +549,20 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
 
             sourcemap_label = http.expose_url(sourcemap_label)
 
+            if frame.get('function'):
+                minified_function_name = frame['function']
+                minified_source = self.get_sourceview(frame['abs_path'])
+            else:
+                minified_function_name = minified_source = None
+
             try:
                 # Errors are 1-indexed in the frames, so we need to -1 to get
                 # zero-indexed value from tokens.
                 assert frame['lineno'] > 0, "line numbers are 1-indexed"
-                token = sourcemap_view.lookup_token(frame['lineno'] - 1, frame['colno'] - 1)
+                token = sourcemap_view.lookup(frame['lineno'] - 1,
+                                              frame['colno'] - 1,
+                                              minified_function_name,
+                                              minified_source)
             except Exception:
                 token = None
                 all_errors.append(
@@ -580,7 +589,7 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
                 logger.debug(
                     'Mapping compressed source %r to mapping in %r', frame['abs_path'], abs_path
                 )
-                source = self.get_source(abs_path)
+                source = self.get_sourceview(abs_path)
 
             if not source:
                 errors = cache.get_errors(abs_path)
@@ -599,17 +608,12 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
                 new_frame['lineno'] = token.src_line + 1
                 new_frame['colno'] = token.src_col + 1
 
-                # Find the original function name with a bit of guessing
-                original_function_name = None
+                # Try to use the function name we got from symbolic
+                original_function_name = token.function_name
 
                 # In the ideal case we can use the function name from the
                 # frame and the location to resolve the original name
                 # through the heuristics in our sourcemap library.
-                if frame.get('function'):
-                    minified_source = self.get_source(frame['abs_path'], raw=True)
-                    original_function_name = sourcemap_view.get_original_function_name(
-                        token.dst_line, token.dst_col, frame['function'],
-                        minified_source)
                 if original_function_name is None:
                     last_token = None
 
@@ -696,7 +700,7 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
     def expand_frame(self, frame, source=None):
         if frame.get('lineno') is not None:
             if source is None:
-                source = self.get_source(frame['abs_path'])
+                source = self.get_sourceview(frame['abs_path'])
                 if source is None:
                     logger.debug('No source found for %s', frame['abs_path'])
                     return False
@@ -707,10 +711,10 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
             return True
         return False
 
-    def get_source(self, filename, raw=False):
+    def get_sourceview(self, filename):
         if filename not in self.cache:
             self.cache_source(filename)
-        return self.cache.get(filename, raw=raw)
+        return self.cache.get(filename)
 
     def cache_source(self, filename):
         sourcemaps = self.sourcemaps
@@ -766,12 +770,12 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
         sourcemaps.add(sourcemap_url, sourcemap_view)
 
         # cache any inlined sources
-        for src_id, source in sourcemap_view.iter_sources():
-            if sourcemap_view.has_source_contents(src_id):
+        for src_id, source_name in sourcemap_view.iter_sources():
+            source_view = sourcemap_view.get_sourceview(src_id)
+            if source_view is not None:
                 self.cache.add(
-                    urljoin(sourcemap_url, source),
-                    lambda view=sourcemap_view, id=src_id: view.get_source_contents(id),
-                    None,
+                    urljoin(sourcemap_url, source_name),
+                    source_view
                 )
 
     def populate_source_cache(self, frames):

+ 8 - 4
src/sentry/utils/strings.py

@@ -206,13 +206,17 @@ def codec_lookup(encoding, default='utf-8'):
     Note: the default value is not sanity checked and would
     bypass these checks."""
 
+    def _get_default():
+        if default is not None:
+            return codecs.lookup(default)
+
     if not encoding:
-        return codecs.lookup(default)
+        return _get_default()
 
     try:
         info = codecs.lookup(encoding)
     except (LookupError, TypeError):
-        return codecs.lookup(default)
+        return _get_default()
 
     try:
         # Check for `CodecInfo._is_text_encoding`.
@@ -221,13 +225,13 @@ def codec_lookup(encoding, default='utf-8'):
         # introduced into 2.7.12, so versions prior to this will
         # raise, but this is the best we can do.
         if not info._is_text_encoding:
-            return codecs.lookup(default)
+            return _get_default()
     except AttributeError:
         pass
 
     # `undefined` is special a special encoding in python that 100% of
     # the time will raise, so ignore it.
     if info.name == 'undefined':
-        return codecs.lookup(default)
+        return _get_default()
 
     return info

+ 40 - 0
tests/sentry/lang/javascript/test_cache.py

@@ -0,0 +1,40 @@
+from __future__ import absolute_import
+
+from sentry.testutils import TestCase
+from sentry.lang.javascript.cache import SourceCache
+
+
+class BasicCacheTest(TestCase):
+    def test_basic_features(self):
+        cache = SourceCache()
+
+        url = 'http://example.com/foo.js'
+
+        assert url not in cache
+        assert cache.get(url) is None
+
+        cache.add(url, b'foo\nbar')
+        assert url in cache
+        assert cache.get(url) is not None
+        assert cache.get(url)[0] == u'foo'
+
+        cache.alias(url + 'x', url)
+        assert url + 'x' in cache
+        assert cache.get(url + 'x')[0] == u'foo'
+
+    def test_encoding_fallback(self):
+        cache = SourceCache()
+
+        url = 'http://example.com/foo.js'
+
+        # fall back to utf-8
+        cache.add(url, b'foobar', encoding='utf-32')
+        assert cache.get(url)[0] == u'foobar'
+
+    def test_encoding_support(self):
+        cache = SourceCache()
+        url = 'http://example.com/foo.js'
+
+        # fall back to utf-8
+        cache.add(url, 'foobar'.encode('utf-32'), encoding='utf-32')
+        assert cache.get(url)[0] == u'foobar'

+ 7 - 5
tests/sentry/lang/javascript/test_processor.py

@@ -5,7 +5,7 @@ from __future__ import absolute_import
 import pytest
 import responses
 import six
-from libsourcemap import Token
+from symbolic import SourceMapTokenMatch
 
 from mock import patch
 from requests.exceptions import RequestException
@@ -399,18 +399,20 @@ class GenerateModuleTest(TestCase):
 class FetchSourcemapTest(TestCase):
     def test_simple_base64(self):
         smap_view = fetch_sourcemap(base64_sourcemap)
-        tokens = [Token(1, 0, '/test.js', 0, 0, 0, None)]
+        tokens = [SourceMapTokenMatch(0, 0, 1, 0, src='/test.js', src_id=0)]
 
         assert list(smap_view) == tokens
-        assert smap_view.get_source_contents(0) == 'console.log("hello, World!")'
+        sv = smap_view.get_sourceview(0)
+        assert sv.get_source() == u'console.log("hello, World!")'
         assert smap_view.get_source_name(0) == u'/test.js'
 
     def test_base64_without_padding(self):
         smap_view = fetch_sourcemap(base64_sourcemap.rstrip('='))
-        tokens = [Token(1, 0, '/test.js', 0, 0, 0, None)]
+        tokens = [SourceMapTokenMatch(0, 0, 1, 0, src='/test.js', src_id=0)]
 
         assert list(smap_view) == tokens
-        assert smap_view.get_source_contents(0) == 'console.log("hello, World!")'
+        sv = smap_view.get_sourceview(0)
+        assert sv.get_source() == u'console.log("hello, World!")'
         assert smap_view.get_source_name(0) == u'/test.js'
 
     def test_broken_base64(self):

+ 67 - 67
tests/sentry/lang/javascript/test_sourcemaps.py

@@ -2,7 +2,7 @@
 
 from __future__ import absolute_import
 
-from libsourcemap import from_json as view_from_json, Token
+from symbolic import SourceMapView, SourceMapTokenMatch
 from sentry.testutils import TestCase
 
 from sentry.utils import json
@@ -66,10 +66,10 @@ indexed_sourcemap_example = json.dumps(
 
 class FindSourceTest(TestCase):
     def test_simple(self):
-        smap_view = view_from_json(sourcemap)
+        smap_view = SourceMapView.from_json_bytes(sourcemap)
 
-        result = smap_view.lookup_token(0, 56)
-        assert result == Token(
+        result = smap_view.lookup(0, 56)
+        assert result == SourceMapTokenMatch(
             dst_line=0,
             dst_col=50,
             src='foo/file2.js',
@@ -80,33 +80,33 @@ class FindSourceTest(TestCase):
         )
 
         # Start of minified file (exact match first line/col tuple)
-        result = smap_view.lookup_token(0, 0)
-        assert result == Token(
+        result = smap_view.lookup(0, 0)
+        assert result == SourceMapTokenMatch(
             dst_line=0, dst_col=0, src='foo/file1.js', src_line=0, src_col=0, src_id=0, name=None
         )
 
         # Last character in mapping
-        result = smap_view.lookup_token(0, 36)
-        assert result == Token(
+        result = smap_view.lookup(0, 36)
+        assert result == SourceMapTokenMatch(
             dst_line=0, dst_col=30, src='foo/file1.js', src_line=2, src_col=1, src_id=0, name=None
         )
 
         # First character in mapping (exact match line/col tuple)
-        result = smap_view.lookup_token(0, 37)
-        assert result == Token(
+        result = smap_view.lookup(0, 37)
+        assert result == SourceMapTokenMatch(
             dst_line=0, dst_col=37, src='foo/file1.js', src_line=2, src_col=8, src_id=0, name='a'
         )
 
         # End of minified file (character *beyond* last line/col tuple)
-        result = smap_view.lookup_token(0, 192)
-        assert result == Token(
+        result = smap_view.lookup(0, 192)
+        assert result == SourceMapTokenMatch(
             dst_line=0, dst_col=191, src='foo/file2.js', src_line=9, src_col=25, src_id=1, name='e'
         )
 
 
 class IterSourcesTest(TestCase):
     def test_basic(self):
-        smap_view = view_from_json(sourcemap)
+        smap_view = SourceMapView.from_json_bytes(sourcemap)
         assert list(smap_view.iter_sources()) == [
             (0, 'foo/file1.js'),
             (1, 'foo/file2.js'),
@@ -116,28 +116,28 @@ class IterSourcesTest(TestCase):
 class GetSourceContentsTest(TestCase):
     def test_no_inline(self):
         # basic sourcemap fixture has no inlined sources, so expect None
-        smap_view = view_from_json(sourcemap)
+        smap_view = SourceMapView.from_json_bytes(sourcemap)
 
-        source = smap_view.get_source_contents(0)
+        source = smap_view.get_sourceview(0)
         assert source is None
 
     def test_indexed_inline(self):
-        smap_view = view_from_json(indexed_sourcemap_example)
+        smap_view = SourceMapView.from_json_bytes(indexed_sourcemap_example)
 
-        assert smap_view.get_source_contents(0) == (
-            ' ONE.foo = function (bar) {\n' + '   return baz(bar);\n' + ' };'
+        assert smap_view.get_sourceview(0).get_source() == (
+            u' ONE.foo = function (bar) {\n' + '   return baz(bar);\n' + ' };'
         )
-        assert smap_view.get_source_contents(1) == (
-            ' TWO.inc = function (n) {\n' + '   return n + 1;\n' + ' };'
+        assert smap_view.get_sourceview(1).get_source() == (
+            u' TWO.inc = function (n) {\n' + '   return n + 1;\n' + ' };'
         )
 
 
 class ParseSourcemapTest(TestCase):
     def test_basic(self):
-        index = view_from_json(sourcemap)
+        index = SourceMapView.from_json_bytes(sourcemap)
 
         assert list(index) == [
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=0,
                 src='foo/file1.js',
@@ -146,7 +146,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=0,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=8,
                 src='foo/file1.js',
@@ -155,7 +155,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=0,
                 name='add'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=13,
                 src='foo/file1.js',
@@ -164,7 +164,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=0,
                 name='a'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=15,
                 src='foo/file1.js',
@@ -173,7 +173,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=0,
                 name='b'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=18,
                 src='foo/file1.js',
@@ -182,7 +182,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=0,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=30,
                 src='foo/file1.js',
@@ -191,7 +191,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=0,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=37,
                 src='foo/file1.js',
@@ -200,7 +200,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=0,
                 name='a'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=40,
                 src='foo/file1.js',
@@ -209,7 +209,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=0,
                 name='b'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=42,
                 src='foo/file2.js',
@@ -218,7 +218,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=50,
                 src='foo/file2.js',
@@ -227,7 +227,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='multiply'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=60,
                 src='foo/file2.js',
@@ -236,7 +236,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='a'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=62,
                 src='foo/file2.js',
@@ -245,7 +245,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='b'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=65,
                 src='foo/file2.js',
@@ -254,7 +254,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=77,
                 src='foo/file2.js',
@@ -263,7 +263,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=84,
                 src='foo/file2.js',
@@ -272,7 +272,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='a'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=87,
                 src='foo/file2.js',
@@ -281,7 +281,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='b'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=89,
                 src='foo/file2.js',
@@ -290,7 +290,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=97,
                 src='foo/file2.js',
@@ -299,7 +299,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='divide'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=105,
                 src='foo/file2.js',
@@ -308,7 +308,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='a'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=107,
                 src='foo/file2.js',
@@ -317,7 +317,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='b'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=110,
                 src='foo/file2.js',
@@ -326,7 +326,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=122,
                 src='foo/file2.js',
@@ -335,7 +335,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=127,
                 src='foo/file2.js',
@@ -344,7 +344,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=133,
                 src='foo/file2.js',
@@ -353,7 +353,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='multiply'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=143,
                 src='foo/file2.js',
@@ -362,7 +362,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='add'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=147,
                 src='foo/file2.js',
@@ -371,7 +371,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='a'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=149,
                 src='foo/file2.js',
@@ -380,7 +380,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='b'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=152,
                 src='foo/file2.js',
@@ -389,7 +389,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='a'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=154,
                 src='foo/file2.js',
@@ -398,7 +398,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='b'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=157,
                 src='foo/file2.js',
@@ -407,7 +407,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='c'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=159,
                 src='foo/file2.js',
@@ -416,7 +416,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name=None
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=165,
                 src='foo/file2.js',
@@ -425,7 +425,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='e'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=168,
                 src='foo/file2.js',
@@ -434,7 +434,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='Raven'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=174,
                 src='foo/file2.js',
@@ -443,7 +443,7 @@ class ParseSourcemapTest(TestCase):
                 src_id=1,
                 name='captureException'
             ),
-            Token(
+            SourceMapTokenMatch(
                 dst_line=0,
                 dst_col=191,
                 src='foo/file2.js',
@@ -459,10 +459,10 @@ class ParseIndexedSourcemapTest(TestCase):
     # Tests lookups that fall exactly on source map token boundaries
     # https://github.com/mozilla/source-map/blob/master/test/test-source-map-consumer.js#138
     def test_exact_mappings(self):
-        smap_view = view_from_json(indexed_sourcemap_example)
+        smap_view = SourceMapView.from_json_bytes(indexed_sourcemap_example)
 
         # one.js
-        assert smap_view.lookup_token(0, 1) == Token(
+        assert smap_view.lookup(0, 1) == SourceMapTokenMatch(
             dst_line=0,
             dst_col=1,
             src='/the/root/one.js',
@@ -471,7 +471,7 @@ class ParseIndexedSourcemapTest(TestCase):
             src_id=0,
             name=None
         )
-        assert smap_view.lookup_token(0, 18) == Token(
+        assert smap_view.lookup(0, 18) == SourceMapTokenMatch(
             dst_line=0,
             dst_col=18,
             src='/the/root/one.js',
@@ -480,7 +480,7 @@ class ParseIndexedSourcemapTest(TestCase):
             src_id=0,
             name='bar'
         )
-        assert smap_view.lookup_token(0, 28) == Token(
+        assert smap_view.lookup(0, 28) == SourceMapTokenMatch(
             dst_line=0,
             dst_col=28,
             src='/the/root/one.js',
@@ -491,7 +491,7 @@ class ParseIndexedSourcemapTest(TestCase):
         )
 
         # two.js
-        assert smap_view.lookup_token(1, 18) == Token(
+        assert smap_view.lookup(1, 18) == SourceMapTokenMatch(
             dst_line=1,
             dst_col=18,
             src='/the/root/two.js',
@@ -500,7 +500,7 @@ class ParseIndexedSourcemapTest(TestCase):
             src_id=1,
             name='n'
         )
-        assert smap_view.lookup_token(1, 21) == Token(
+        assert smap_view.lookup(1, 21) == SourceMapTokenMatch(
             dst_line=1,
             dst_col=21,
             src='/the/root/two.js',
@@ -509,7 +509,7 @@ class ParseIndexedSourcemapTest(TestCase):
             src_id=1,
             name=None
         )
-        assert smap_view.lookup_token(1, 21) == Token(
+        assert smap_view.lookup(1, 21) == SourceMapTokenMatch(
             dst_line=1,
             dst_col=21,
             src='/the/root/two.js',
@@ -522,10 +522,10 @@ class ParseIndexedSourcemapTest(TestCase):
     # Tests lookups that fall inside source map token boundaries
     # https://github.com/mozilla/source-map/blob/master/test/test-source-map-consumer.js#181
     def test_fuzzy_mapping(self):
-        smap_view = view_from_json(indexed_sourcemap_example)
+        smap_view = SourceMapView.from_json_bytes(indexed_sourcemap_example)
 
         # one.js
-        assert smap_view.lookup_token(0, 20) == Token(
+        assert smap_view.lookup(0, 20) == SourceMapTokenMatch(
             dst_line=0,
             dst_col=18,
             src='/the/root/one.js',
@@ -534,7 +534,7 @@ class ParseIndexedSourcemapTest(TestCase):
             src_id=0,
             name='bar'
         )
-        assert smap_view.lookup_token(0, 30) == Token(
+        assert smap_view.lookup(0, 30) == SourceMapTokenMatch(
             dst_line=0,
             dst_col=28,
             src='/the/root/one.js',
@@ -543,7 +543,7 @@ class ParseIndexedSourcemapTest(TestCase):
             src_id=0,
             name='baz'
         )
-        assert smap_view.lookup_token(1, 12) == Token(
+        assert smap_view.lookup(1, 12) == SourceMapTokenMatch(
             dst_line=1,
             dst_col=9,
             src='/the/root/two.js',

+ 26 - 1
tests/sentry/utils/test_strings.py

@@ -1,15 +1,40 @@
 from __future__ import absolute_import
 
+import sys
 import functools
 
 from sentry.utils.strings import (
-    is_valid_dot_atom, iter_callsign_choices, soft_break, soft_hyphenate, tokens_from_name
+    is_valid_dot_atom, iter_callsign_choices, soft_break, soft_hyphenate,
+    tokens_from_name, codec_lookup
 )
 
 ZWSP = u'\u200b'  # zero width space
 SHY = u'\u00ad'  # soft hyphen
 
 
+def test_codec_lookup():
+    def assert_match(enc, ref=None):
+        if ref is None:
+            ref = enc
+        rv = codec_lookup(enc)
+        if rv is None:
+            assert ref is rv is None
+        else:
+            assert rv.name == ref
+
+    assert codec_lookup('utf-8').name == 'utf-8'
+    assert codec_lookup('utf8').name == 'utf-8'
+    if sys.version_info[:3] >= (2, 7, 12):
+        assert codec_lookup('zlib').name == 'utf-8'
+    assert codec_lookup('utf16').name == 'utf-16'
+    assert codec_lookup('undefined').name == 'utf-8'
+    assert codec_lookup('undefined', default=None) is None
+    assert codec_lookup('undefined', default='latin1').name == 'iso8859-1'
+    if sys.version_info[:3] >= (2, 7, 12):
+        assert codec_lookup('zlib', default='latin1').name == 'iso8859-1'
+    assert codec_lookup('unknownshit', default='latin1').name == 'iso8859-1'
+
+
 def test_soft_break():
     assert soft_break(
         'com.example.package.method(argument).anotherMethod(argument)', 15