from nose.tools import assert_true, assert_false, assert_equals from gixy.core.regexp import Regexp ''' CATEGORIES: sre_parse.CATEGORY_SPACE sre_parse.CATEGORY_NOT_SPACE sre_parse.CATEGORY_DIGIT sre_parse.CATEGORY_NOT_DIGIT sre_parse.CATEGORY_WORD sre_parse.CATEGORY_NOT_WORD ANY ''' def test_positive_contains(): cases = ( (r'[a-z]', 'a'), (r'[a-z]*', 'a'), (r'[a-z]*?', 'a'), (r'[a-z]+?', 'a'), (r'[a-z]', 'z'), (r'(?:a|b)', 'b'), (r'(/|:|[a-z])', 'g'), (r'[^a-z]', '/'), (r'[^a-z]', '\n'), (r'[^0]', '9'), (r'[^0-2]', '3'), (r'[^0123a-z]', '9'), (r'\s', '\x20'), (r'[^\s]', 'a'), (r'\d', '1'), (r'[^\d]', 'b'), (r'\w', '_'), (r'[^\w]', '\n'), (r'\W', '\n'), (r'[^\W]', 'a'), (r'.', 'a') ) for case in cases: regexp, char = case yield check_positive_contain, regexp, char def test_negative_contains(): cases = ( ('[a-z]', '1'), ('[a-z]*', '2'), ('[a-z]*?', '3'), ('[a-z]+?', '4'), ('[a-z]', '\n'), ('(?:a|b)', 'c'), ('(/|:|[a-z])', '\n'), ('[^a-z]', 'a'), ('[^0]', '0'), ('[^0-2]', '0'), ('[^0123a-z]', 'z'), (r'\s', 'a'), (r'[^\s]', '\n'), (r'\d', 'f'), (r'[^\d]', '2'), (r'\w', '\n'), (r'[^\w]', '_'), (r'\W', 'a'), (r'[^\W]', '\n'), (r'.', '\n') ) for case in cases: regexp, char = case yield check_negative_contain, regexp, char def test_groups_names(): cases = ( ('foo', [0]), ('(1)(2)(?:3)', [0, 1, 2]), ('(1)((2)|(?:3))', [0, 1, 2, 3]), ("(?'pcre_7'1as)(?P(?2)|(?:3))", [0, 1, 2, 3, 'pcre_7', 'outer', 'inner']), ('/proxy/(?.*)$', [0, 1, 'proxy']) ) for case in cases: regexp, groups = case yield check_groups_names, regexp, groups def test_to_string(): cases = ( (r'foo', 'foo'), (r'(1)(2)(?:3)', '(1)(2)(?:3)'), (r'(1)((2)|(?:3))', '(1)((?:(2)|(?:3)))'), (r'\w|1|3-5|[a-z]', '(?:[\w]|1|3\\-5|[a-z])'), (r'(1|(?:3)|([4-6]))', '((?:1|(?:3)|([4-6])))'), (r'(1|(?:3)|(?P[4-6]))', '((?:1|(?:3)|([4-6])))'), (r'^sss', '^sss'), (r'(^bb|11)$', '((?:^bb|11))$'), (r'(http|https)', '(http(?:|s))'), (r'1*', '1*'), (r'1*?', '1*?'), (r'1+', '1+'), ) for case in cases: regexp, string = case yield check_to_string, regexp, string def test_positive_startswith(): cases = ( (r'foo', 'q', False), (r'foo', 'f', True), (r'^foo', 'f', False), (r'(^foo)', 'f', False), (r'(^foo)', 'f', True), (r'(^foo|g)', 'f', True), (r'(^foo|g)', 'g', True), (r'(^foo|g)', 'q', False), (r'^[^/]+', '\n', True), (r'/[^/]+', '/', True), (r'((a))', 'a', False), (r'((a))', 'b', False), (r'^[a-z]{0}0', '0', False), (r'^[a-z]{1}0', 'a', False), ) for case in cases: regexp, check, strict = case yield check_positive_startswith, regexp, check, strict def test_negative_startswith(): cases = ( (r'foo', '\n', False), (r'foo', 'o', True), (r'^foo', 'o', False), (r'(^foo)', 'q', False), (r'(^foo)', 'q', True), (r'(^foo|g)', 'q', True), (r'(^foo|g)', 'o', True), (r'(^foo|g)', '\n', False), (r'^[^/]+', '/', True), (r'/[^/]+', 'a', True), (r'((abc)|(ss))', 'b', True), (r'^[a-z]{0}0', 'a', False), (r'^[a-z]{0}0', 'g', False), ) for case in cases: regexp, check, strict = case yield check_negative_startswith, regexp, check, strict def test_positive_must_contain(): cases = ( (r'abc', 'a'), (r'abc', 'b'), (r'abc', 'c'), (r'3+', '3'), (r'[0]', '0'), (r'([0])', '0'), (r'(?:[0])', '0'), (r'(?:[0])|0|((((0))))', '0'), ) for case in cases: regexp, char = case yield check_positive_must_contain, regexp, char def test_negative_must_contain(): cases = ( (r'[a-z]', '1'), (r'2{0}1', '2'), (r'3?', '3'), (r'3*', '3'), (r'3*?', '3'), (r'3+a', 'b'), (r'[a-z]', 'a'), (r'(?:a|b)', 'a'), (r'(?:a|b)', 'b'), (r'(/|:|[a-z])', '/'), (r'(/|:|[a-z])', 'z'), (r'[^a-z]', '\n'), (r'[^0]', '0'), (r'[^0-2]', '0'), (r'[^0123a-z]', 'z'), (r'\s', '\x20'), (r'[^\s]', '\n'), (r'\d', '3'), (r'[^\d]', 'a'), (r'\w', 'a'), (r'[^\w]', '\n'), (r'\W', '\n'), (r'[^\W]', 'a'), (r'.', '\n') ) for case in cases: regexp, char = case yield check_negative_must_contain, regexp, char def test_positive_must_startswith(): cases = ( (r'foo', 'f', True), (r'^foo', 'f', False), (r'(^foo)', 'f', True), (r'^((a))', 'a', False), (r'((a))', 'a', True), (r'^[a-z]{0}0', '0', False), (r'^a{1}0', 'a', False), ) for case in cases: regexp, check, strict = case yield check_positive_must_startswith, regexp, check, strict def test_negative_must_startswith(): cases = ( (r'foo', 'o', False), (r'^foo', 'o', False), (r'(^foo)', 'o', False), (r'[a-z]', '1', True), (r'[a-z]', 'a', True), (r'/[^/]+', 'a', True), (r'3?', '3', True), (r'3*', '3', True), (r'3*?', '3', True), (r'3+a', 'b', True), (r'^((a))', 'b', False), (r'((a))', 'a', False), (r'^a{0}0', 'a', False), ) for case in cases: regexp, check, strict = case yield check_negative_must_startswith, regexp, check, strict def test_generate(): cases = ( (r'foo', ['foo']), (r'^sss', ['^sss']), (r'(1)(2)(3)', ['123']), (r'(1)((2)|(?:3))', ['12', '13']), (r'(^1?2?|aa/)', ['^', '^1', '^2', '^12', 'aa/']), (r'^https?://yandex.ru', ['^http://yandex|ru', '^https://yandex|ru']), (r'(^bb|11)$', ['^bb$', '11$']), (r'(http|https)', ['http', 'https']), (r'1*', ['', '11111']), (r'1*?', ['', '11111']), (r'1[0]?2', ['102', '12']), (r'1[0]2', ['102']), (r'1+', ['11111']), (r'[^/]?', ['', '|']), (r'^http://(foo|bar)|baz', ['^http://foo', '^http://bar', 'baz']), (r'[^\x00-\x7b|\x7e-\xff]', ['\x7d']), (r'(a|b|c)', ['a', 'b', 'c']), (r'[xyz]', ['x', 'y', 'z']) ) for case in cases: regexp, values = case yield check_generate, regexp, values def test_strict_generate(): reg = Regexp('^foo|bar', strict=True) assert_equals(sorted(reg.generate('|', anchored=True)), sorted(['^foo', '^bar'])) def test_gen_anchor(): reg = Regexp('^some$') val = next(reg.generate('', anchored=False)) assert_equals(val, 'some') reg = Regexp('^some$') val = next(reg.generate('', anchored=True)) assert_equals(val, '^some$') reg = Regexp('^some$', strict=True) val = next(reg.generate('', anchored=False)) assert_equals(val, 'some') reg = Regexp('^some$', strict=True) val = next(reg.generate('', anchored=True)) assert_equals(val, '^some$') def test_group_can_contains(): source = '/some/(?P[^/:.]+)/' reg = Regexp(source) assert_true(reg.can_contain('\n'), 'Whole regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n')) assert_true(reg.group(0).can_contain('\n'), 'Group 0 from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n')) assert_true(reg.group('action').can_contain('\n'), 'Group "action" from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n')) assert_true(reg.group(1).can_contain('\n'), 'Group 1 from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n')) assert_false(reg.group('action').can_contain('/'), 'Group "action" from regex "{src}" CAN\'T (!) contain {sym!r}'.format(src=source, sym='/')) def check_positive_contain(regexp, char): reg = Regexp(regexp, case_sensitive=True) assert_true(reg.can_contain(char), '{reg!r} should contain {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False) char = char.upper() assert_true(reg.can_contain(char), '{reg!r} (case insensitive) should contain {chr!r}'.format(reg=regexp, chr=char)) def check_negative_contain(regexp, char): reg = Regexp(regexp, case_sensitive=True) assert_false(reg.can_contain(char), '{reg!r} should not contain {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False) char = char.upper() assert_false(reg.can_contain(char), '{reg!r} (case insensitive) should not contain {chr!r}'.format(reg=regexp, chr=char)) def check_positive_startswith(regexp, char, strict): reg = Regexp(regexp, case_sensitive=True, strict=strict) assert_true(reg.can_startswith(char), '{reg!r} can start\'s with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False, strict=strict) char = char.upper() assert_true(reg.can_startswith(char), '{reg!r} (case insensitive) can start\'s with {chr!r}'.format(reg=regexp, chr=char)) def check_negative_startswith(regexp, char, strict): reg = Regexp(regexp, case_sensitive=True, strict=strict) assert_false(reg.can_startswith(char), '{reg!r} can\'t start\'s with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False, strict=strict) char = char.upper() assert_false(reg.can_startswith(char), '{reg!r} (case insensitive) can\'t start\'s with {chr!r}'.format(reg=regexp, chr=char)) def check_groups_names(regexp, groups): reg = Regexp(regexp) assert_equals(set(reg.groups.keys()), set(groups)) def check_to_string(regexp, string): reg = Regexp(regexp) assert_equals(str(reg), string) def check_positive_must_contain(regexp, char): reg = Regexp(regexp, case_sensitive=True) assert_true(reg.must_contain(char), '{reg!r} must contain with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False) char = char.upper() assert_true(reg.must_contain(char), '{reg!r} (case insensitive) must contain with {chr!r}'.format(reg=regexp, chr=char)) def check_negative_must_contain(regexp, char): reg = Regexp(regexp, case_sensitive=True) assert_false(reg.must_contain(char), '{reg!r} must NOT contain with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False) char = char.upper() assert_false(reg.must_contain(char), '{reg!r} (case insensitive) must NOT contain with {chr!r}'.format(reg=regexp, chr=char)) def check_positive_must_startswith(regexp, char, strict): reg = Regexp(regexp, case_sensitive=True, strict=strict) assert_true(reg.must_startswith(char), '{reg!r} MUST start\'s with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False, strict=strict) char = char.upper() assert_true(reg.must_startswith(char), '{reg!r} (case insensitive) MUST start\'s with {chr!r}'.format(reg=regexp, chr=char)) def check_negative_must_startswith(regexp, char, strict): reg = Regexp(regexp, case_sensitive=True, strict=strict) assert_false(reg.must_startswith(char), '{reg!r} MUST NOT start\'s with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False, strict=strict) char = char.upper() assert_false(reg.must_startswith(char), '{reg!r} (case insensitive) MUST NOT start\'s with {chr!r}'.format(reg=regexp, chr=char)) def check_generate(regexp, values): reg = Regexp(regexp) assert_equals(sorted(reg.generate('|', anchored=True)), sorted(values))