test_prefix.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. from itertools import zip_longest
  2. from codecs import BOM_UTF8
  3. import pytest
  4. import parso
  5. unicode_bom = BOM_UTF8.decode('utf-8')
  6. @pytest.mark.parametrize(('string', 'tokens'), [
  7. ('', ['']),
  8. ('#', ['#', '']),
  9. (' # ', ['# ', '']),
  10. (' # \n', ['# ', '\n', '']),
  11. (' # \f\n', ['# ', '\f', '\n', '']),
  12. (' \n', ['\n', '']),
  13. (' \n ', ['\n', ' ']),
  14. (' \f ', ['\f', ' ']),
  15. (' \f ', ['\f', ' ']),
  16. (' \r\n', ['\r\n', '']),
  17. (' \r', ['\r', '']),
  18. ('\\\n', ['\\\n', '']),
  19. ('\\\r\n', ['\\\r\n', '']),
  20. ('\t\t\n\t', ['\n', '\t']),
  21. ])
  22. def test_simple_prefix_splitting(string, tokens):
  23. tree = parso.parse(string)
  24. leaf = tree.children[0]
  25. assert leaf.type == 'endmarker'
  26. parsed_tokens = list(leaf._split_prefix())
  27. start_pos = (1, 0)
  28. for pt, expected in zip_longest(parsed_tokens, tokens):
  29. assert pt.value == expected
  30. # Calculate the estimated end_pos
  31. if expected.endswith('\n') or expected.endswith('\r'):
  32. end_pos = start_pos[0] + 1, 0
  33. else:
  34. end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing)
  35. # assert start_pos == pt.start_pos
  36. assert end_pos == pt.end_pos
  37. start_pos = end_pos
  38. @pytest.mark.parametrize(('string', 'types'), [
  39. ('# ', ['comment', 'spacing']),
  40. ('\r\n', ['newline', 'spacing']),
  41. ('\f', ['formfeed', 'spacing']),
  42. ('\\\n', ['backslash', 'spacing']),
  43. (' \t', ['spacing']),
  44. (' \t ', ['spacing']),
  45. (unicode_bom + ' # ', ['bom', 'comment', 'spacing']),
  46. ])
  47. def test_prefix_splitting_types(string, types):
  48. tree = parso.parse(string)
  49. leaf = tree.children[0]
  50. assert leaf.type == 'endmarker'
  51. parsed_tokens = list(leaf._split_prefix())
  52. assert [t.type for t in parsed_tokens] == types
  53. def test_utf8_bom():
  54. tree = parso.parse(unicode_bom + 'a = 1')
  55. expr_stmt = tree.children[0]
  56. assert expr_stmt.start_pos == (1, 0)
  57. tree = parso.parse(unicode_bom + '\n')
  58. endmarker = tree.children[0]
  59. parts = list(endmarker._split_prefix())
  60. assert [p.type for p in parts] == ['bom', 'newline', 'spacing']
  61. assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)]
  62. assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)]