test_utils.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. from codecs import BOM_UTF8
  2. from parso.utils import (
  3. split_lines,
  4. parse_version_string,
  5. python_bytes_to_unicode,
  6. )
  7. import parso
  8. import pytest
  9. @pytest.mark.parametrize(
  10. ('string', 'expected_result', 'keepends'), [
  11. ('asd\r\n', ['asd', ''], False),
  12. ('asd\r\n', ['asd\r\n', ''], True),
  13. ('asd\r', ['asd', ''], False),
  14. ('asd\r', ['asd\r', ''], True),
  15. ('asd\n', ['asd', ''], False),
  16. ('asd\n', ['asd\n', ''], True),
  17. ('asd\r\n\f', ['asd', '\f'], False),
  18. ('asd\r\n\f', ['asd\r\n', '\f'], True),
  19. ('\fasd\r\n', ['\fasd', ''], False),
  20. ('\fasd\r\n', ['\fasd\r\n', ''], True),
  21. ('', [''], False),
  22. ('', [''], True),
  23. ('\n', ['', ''], False),
  24. ('\n', ['\n', ''], True),
  25. ('\r', ['', ''], False),
  26. ('\r', ['\r', ''], True),
  27. # Invalid line breaks
  28. ('a\vb', ['a\vb'], False),
  29. ('a\vb', ['a\vb'], True),
  30. ('\x1C', ['\x1C'], False),
  31. ('\x1C', ['\x1C'], True),
  32. ]
  33. )
  34. def test_split_lines(string, expected_result, keepends):
  35. assert split_lines(string, keepends=keepends) == expected_result
  36. def test_python_bytes_to_unicode_unicode_text():
  37. source = (
  38. b"# vim: fileencoding=utf-8\n"
  39. b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n"
  40. )
  41. actual = python_bytes_to_unicode(source)
  42. expected = source.decode('utf-8')
  43. assert actual == expected
  44. def test_utf8_bom():
  45. unicode_bom = BOM_UTF8.decode('utf-8')
  46. module = parso.parse(unicode_bom)
  47. endmarker = module.children[0]
  48. assert endmarker.type == 'endmarker'
  49. assert unicode_bom == endmarker.prefix
  50. module = parso.parse(unicode_bom + 'foo = 1')
  51. expr_stmt = module.children[0]
  52. assert expr_stmt.type == 'expr_stmt'
  53. assert unicode_bom == expr_stmt.get_first_leaf().prefix
  54. @pytest.mark.parametrize(
  55. ('code', 'errors'), [
  56. (b'# coding: wtf-12\nfoo', 'strict'),
  57. (b'# coding: wtf-12\nfoo', 'replace'),
  58. (b'# coding: wtf-12\r\nfoo', 'strict'),
  59. (b'# coding: wtf-12\r\nfoo', 'replace'),
  60. (b'# coding: wtf-12\rfoo', 'strict'),
  61. (b'# coding: wtf-12\rfoo', 'replace'),
  62. ]
  63. )
  64. def test_bytes_to_unicode_failing_encoding(code, errors):
  65. if errors == 'strict':
  66. with pytest.raises(LookupError):
  67. python_bytes_to_unicode(code, errors=errors)
  68. else:
  69. python_bytes_to_unicode(code, errors=errors)
  70. @pytest.mark.parametrize(
  71. ('version_str', 'version'), [
  72. ('3', (3,)),
  73. ('3.6', (3, 6)),
  74. ('3.6.10', (3, 6)),
  75. ('3.10', (3, 10)),
  76. ('3.10a9', (3, 10)),
  77. ('3.10b9', (3, 10)),
  78. ('3.10rc9', (3, 10)),
  79. ]
  80. )
  81. def test_parse_version_string(version_str, version):
  82. parsed_version = parse_version_string(version_str)
  83. if len(version) == 1:
  84. assert parsed_version[0] == version[0]
  85. else:
  86. assert parsed_version == version