# -*- coding: utf-8 -*- from __future__ import print_function, absolute_import, division import io import math import pytest import six import sys from functools import partial from cyson import ( dumps, loads, YsonInt64, YsonUInt64, UInt, Writer, OutputStream, UnicodeReader, ) if six.PY2: NativeUInt = long # noqa: F821 elif six.PY3: NativeUInt = UInt unicode = str long = int else: raise RuntimeError('Unsupported Python version') def canonize(value, as_unicode=False): _canonize = partial(canonize, as_unicode=as_unicode) if isinstance(value, (list, tuple)): return [_canonize(_) for _ in value] elif isinstance(value, dict): return {_canonize(k): _canonize(value[k]) for k in value} elif isinstance(value, unicode) and not as_unicode: return value.encode('utf8') elif isinstance(value, bytes) and as_unicode: return value.decode('utf8') return value def switch_string_type(string): if isinstance(string, bytes): return string.decode('utf8') elif isinstance(string, unicode): return string.encode('utf8') raise TypeError('expected unicode or bytes, got {!r}'.format(string)) def coerce(obj, to, via=None): if via is None: via = to if isinstance(obj, to): return obj return via(obj) SKIP_PY3 = pytest.mark.skipif(six.PY3, reason='Makes no sense for Python3') if six.PY3 and sys.platform == 'win32': NUMPY_CASES = [] else: import numpy as np NUMPY_CASES = [ # numpy int np.int8(2 ** 7 - 1), np.int16(2 ** 15 - 1), np.int32(2 ** 31 - 1), np.int64(2 ** 63 - 1), # numpy uint np.uint8(2 ** 8 - 1), np.uint16(2 ** 16 - 1), np.uint32(2 ** 32 - 1), np.uint64(2 ** 64 - 1), # numpy float np.float16(100.0), np.float32(100.0), np.float64(100.0), ] CASES = [ # NoneType None, # boolean True, False, # int 0, 1, -1, int(2 ** 63 - 1), int(-2 ** 63), # float 0.0, 100.0, -100.0, float('inf'), float('-inf'), # bytes b'', b'hello', u'Привет'.encode('utf8'), # unicode u'', u'hello', u'Привет', # list [], [0], [1, 'hello'], [17, 'q'] * 100, [b'bytes'], # tuple (), (0,), (1, 'hello'), (17, 'q') * 100, (b'bytes',), # dict {}, {'a': 'b'}, {'a': 17}, {'a': [1, 2, 3]}, {b'a': 1, u'b': b'a'} ] + NUMPY_CASES @pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) @pytest.mark.parametrize('value', CASES) def test_roundtrip(value, format): encoded = dumps(value, format) decoded = loads(encoded) assert encoded == dumps(value, switch_string_type(format)) assert decoded == canonize(value) # NOTE: roundtrip test doesn't work for NaN (NaN != NaN) @pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) def test_nan(format): encoded = dumps(float('nan'), format) decoded = loads(encoded) assert encoded == dumps(float('nan'), switch_string_type(format)) assert math.isnan(decoded) @SKIP_PY3 @pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) @pytest.mark.parametrize( 'value', [long(0), long(1), long(2 ** 63), long(2 ** 64 - 1)] ) def test_long_roundtrip(value, format): encoded = dumps(value, format) decoded = loads(encoded) assert encoded == dumps(value, switch_string_type(format)) assert decoded == value @pytest.mark.parametrize( 'value', [NativeUInt(0), NativeUInt(111), NativeUInt(2 ** 63), NativeUInt(2 ** 64 - 1)] ) @pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) def test_readwrite_uint64(value, format): dumped_uint64 = dumps(coerce(value, YsonUInt64), format=format) loaded_uint64 = loads(dumped_uint64) assert type(value) is NativeUInt assert type(loaded_uint64) is NativeUInt assert dumps(value, format=format) == dumped_uint64 @pytest.mark.parametrize('value', [int(-2 ** 63), -111, 0, 111, int(2 ** 63 - 1)]) @pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) def test_readwrite_int64(value, format): dumped_int64 = dumps(YsonInt64(value), format=format) loaded_int64 = loads(dumped_int64) assert type(value) is int assert type(loaded_int64) is int assert dumps(value, format=format) == dumped_int64 @SKIP_PY3 def test_long_overflow(): with pytest.raises(OverflowError): dumps(long(-1)) with pytest.raises(OverflowError): dumps(long(2**64)) @pytest.mark.parametrize('value', [2 ** 63, -2 ** 63 - 1]) def test_int64_overflow(value): with pytest.raises(OverflowError): int64_value = YsonInt64(value) dumps(int64_value) if six.PY3: with pytest.raises(OverflowError): dumps(value) @pytest.mark.parametrize('value', [2 ** 64, 2 ** 100]) def test_uint64_overflow(value): with pytest.raises(OverflowError): uint64_value = YsonUInt64(value) dumps(uint64_value) @pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) def test_force_write_sequence(format): class Sequence(object): def __init__(self, seq): self._seq = seq def __getitem__(self, index): return self._seq[index] def __len__(self): return len(self._seq) sequence = [1, 1.1, None, b'xyz'] sink = io.BytesIO() writer = Writer(OutputStream.from_file(sink), format=format) writer.begin_stream() writer.list(Sequence(sequence)) writer.end_stream() assert sink.getvalue() == dumps(sequence, format) @pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) def test_force_write_mapping(format): class Mapping(object): def __init__(self, mapping): self._mapping = mapping def __getitem__(self, key): return self._mapping[key] def keys(self): return self._mapping.keys() mapping = {b'a': 1, b'b': 1.1, b'c': None, b'd': b'some'} sink = io.BytesIO() writer = Writer(OutputStream.from_file(sink), format=format) writer.begin_stream() writer.map(Mapping(mapping)) writer.end_stream() assert sink.getvalue() == dumps(mapping, format) @pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) @pytest.mark.parametrize('value', CASES) def test_unicode_reader(value, format): expected = canonize(value, as_unicode=True) got = loads(dumps(value, format), UnicodeReader) assert expected == got def test_unicode_reader_raises_unicode_decode_error(): not_decodable = b'\x80\x81' with pytest.raises(UnicodeDecodeError): loads(dumps(not_decodable, format='binary'), UnicodeReader) def test_unicode_reader_decodes_object_with_attributes(): data = b'{"a" = "b"; "c" = <"foo" = "bar">"d"}' expected = {u"a": u"b", u"c": u"d"} assert loads(data, UnicodeReader) == expected