123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- from itertools import zip_longest
- from codecs import BOM_UTF8
- import pytest
- import parso
- unicode_bom = BOM_UTF8.decode('utf-8')
- @pytest.mark.parametrize(('string', 'tokens'), [
- ('', ['']),
- ('#', ['#', '']),
- (' # ', ['# ', '']),
- (' # \n', ['# ', '\n', '']),
- (' # \f\n', ['# ', '\f', '\n', '']),
- (' \n', ['\n', '']),
- (' \n ', ['\n', ' ']),
- (' \f ', ['\f', ' ']),
- (' \f ', ['\f', ' ']),
- (' \r\n', ['\r\n', '']),
- (' \r', ['\r', '']),
- ('\\\n', ['\\\n', '']),
- ('\\\r\n', ['\\\r\n', '']),
- ('\t\t\n\t', ['\n', '\t']),
- ])
- def test_simple_prefix_splitting(string, tokens):
- tree = parso.parse(string)
- leaf = tree.children[0]
- assert leaf.type == 'endmarker'
- parsed_tokens = list(leaf._split_prefix())
- start_pos = (1, 0)
- for pt, expected in zip_longest(parsed_tokens, tokens):
- assert pt.value == expected
- # Calculate the estimated end_pos
- if expected.endswith('\n') or expected.endswith('\r'):
- end_pos = start_pos[0] + 1, 0
- else:
- end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing)
- # assert start_pos == pt.start_pos
- assert end_pos == pt.end_pos
- start_pos = end_pos
- @pytest.mark.parametrize(('string', 'types'), [
- ('# ', ['comment', 'spacing']),
- ('\r\n', ['newline', 'spacing']),
- ('\f', ['formfeed', 'spacing']),
- ('\\\n', ['backslash', 'spacing']),
- (' \t', ['spacing']),
- (' \t ', ['spacing']),
- (unicode_bom + ' # ', ['bom', 'comment', 'spacing']),
- ])
- def test_prefix_splitting_types(string, types):
- tree = parso.parse(string)
- leaf = tree.children[0]
- assert leaf.type == 'endmarker'
- parsed_tokens = list(leaf._split_prefix())
- assert [t.type for t in parsed_tokens] == types
- def test_utf8_bom():
- tree = parso.parse(unicode_bom + 'a = 1')
- expr_stmt = tree.children[0]
- assert expr_stmt.start_pos == (1, 0)
- tree = parso.parse(unicode_bom + '\n')
- endmarker = tree.children[0]
- parts = list(endmarker._split_prefix())
- assert [p.type for p in parts] == ['bom', 'newline', 'spacing']
- assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)]
- assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)]
|