test_regexp.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. from nose.tools import assert_true, assert_false, assert_equals
  2. from gixy.core.regexp import Regexp
  3. '''
  4. CATEGORIES:
  5. sre_parse.CATEGORY_SPACE
  6. sre_parse.CATEGORY_NOT_SPACE
  7. sre_parse.CATEGORY_DIGIT
  8. sre_parse.CATEGORY_NOT_DIGIT
  9. sre_parse.CATEGORY_WORD
  10. sre_parse.CATEGORY_NOT_WORD
  11. ANY
  12. '''
  13. def test_positive_contains():
  14. cases = (
  15. (r'[a-z]', 'a'),
  16. (r'[a-z]*', 'a'),
  17. (r'[a-z]*?', 'a'),
  18. (r'[a-z]+?', 'a'),
  19. (r'[a-z]', 'z'),
  20. (r'(?:a|b)', 'b'),
  21. (r'(/|:|[a-z])', 'g'),
  22. (r'[^a-z]', '/'),
  23. (r'[^a-z]', '\n'),
  24. (r'[^0]', '9'),
  25. (r'[^0-2]', '3'),
  26. (r'[^0123a-z]', '9'),
  27. (r'\s', '\x20'),
  28. (r'[^\s]', 'a'),
  29. (r'\d', '1'),
  30. (r'[^\d]', 'b'),
  31. (r'\w', '_'),
  32. (r'[^\w]', '\n'),
  33. (r'\W', '\n'),
  34. (r'[^\W]', 'a'),
  35. (r'.', 'a')
  36. )
  37. for case in cases:
  38. regexp, char = case
  39. yield check_positive_contain, regexp, char
  40. def test_negative_contains():
  41. cases = (
  42. ('[a-z]', '1'),
  43. ('[a-z]*', '2'),
  44. ('[a-z]*?', '3'),
  45. ('[a-z]+?', '4'),
  46. ('[a-z]', '\n'),
  47. ('(?:a|b)', 'c'),
  48. ('(/|:|[a-z])', '\n'),
  49. ('[^a-z]', 'a'),
  50. ('[^0]', '0'),
  51. ('[^0-2]', '0'),
  52. ('[^0123a-z]', 'z'),
  53. (r'\s', 'a'),
  54. (r'[^\s]', '\n'),
  55. (r'\d', 'f'),
  56. (r'[^\d]', '2'),
  57. (r'\w', '\n'),
  58. (r'[^\w]', '_'),
  59. (r'\W', 'a'),
  60. (r'[^\W]', '\n'),
  61. (r'.', '\n')
  62. )
  63. for case in cases:
  64. regexp, char = case
  65. yield check_negative_contain, regexp, char
  66. def test_groups_names():
  67. cases = (
  68. ('foo', [0]),
  69. ('(1)(2)(?:3)', [0, 1, 2]),
  70. ('(1)((2)|(?:3))', [0, 1, 2, 3]),
  71. ("(?'pcre_7'1as)(?P<outer>(?<inner>2)|(?:3))", [0, 1, 2, 3, 'pcre_7', 'outer', 'inner']),
  72. ('/proxy/(?<proxy>.*)$', [0, 1, 'proxy'])
  73. )
  74. for case in cases:
  75. regexp, groups = case
  76. yield check_groups_names, regexp, groups
  77. def test_to_string():
  78. cases = (
  79. (r'foo', 'foo'),
  80. (r'(1)(2)(?:3)', '(1)(2)(?:3)'),
  81. (r'(1)((2)|(?:3))', '(1)((?:(2)|(?:3)))'),
  82. (r'\w|1|3-5|[a-z]', '(?:[\w]|1|3\\-5|[a-z])'),
  83. (r'(1|(?:3)|([4-6]))', '((?:1|(?:3)|([4-6])))'),
  84. (r'(1|(?:3)|(?P<aaa>[4-6]))', '((?:1|(?:3)|([4-6])))'),
  85. (r'^sss', '^sss'),
  86. (r'(^bb|11)$', '((?:^bb|11))$'),
  87. (r'(http|https)', '(http(?:|s))'),
  88. (r'1*', '1*'),
  89. (r'1*?', '1*?'),
  90. (r'1+', '1+'),
  91. )
  92. for case in cases:
  93. regexp, string = case
  94. yield check_to_string, regexp, string
  95. def test_positive_startswith():
  96. cases = (
  97. (r'foo', 'q', False),
  98. (r'foo', 'f', True),
  99. (r'^foo', 'f', False),
  100. (r'(^foo)', 'f', False),
  101. (r'(^foo)', 'f', True),
  102. (r'(^foo|g)', 'f', True),
  103. (r'(^foo|g)', 'g', True),
  104. (r'(^foo|g)', 'q', False),
  105. (r'^[^/]+', '\n', True),
  106. (r'/[^/]+', '/', True),
  107. (r'((a))', 'a', False),
  108. (r'((a))', 'b', False),
  109. (r'^[a-z]{0}0', '0', False),
  110. (r'^[a-z]{1}0', 'a', False),
  111. )
  112. for case in cases:
  113. regexp, check, strict = case
  114. yield check_positive_startswith, regexp, check, strict
  115. def test_negative_startswith():
  116. cases = (
  117. (r'foo', '\n', False),
  118. (r'foo', 'o', True),
  119. (r'^foo', 'o', False),
  120. (r'(^foo)', 'q', False),
  121. (r'(^foo)', 'q', True),
  122. (r'(^foo|g)', 'q', True),
  123. (r'(^foo|g)', 'o', True),
  124. (r'(^foo|g)', '\n', False),
  125. (r'^[^/]+', '/', True),
  126. (r'/[^/]+', 'a', True),
  127. (r'((abc)|(ss))', 'b', True),
  128. (r'^[a-z]{0}0', 'a', False),
  129. (r'^[a-z]{0}0', 'g', False),
  130. )
  131. for case in cases:
  132. regexp, check, strict = case
  133. yield check_negative_startswith, regexp, check, strict
  134. def test_positive_must_contain():
  135. cases = (
  136. (r'abc', 'a'),
  137. (r'abc', 'b'),
  138. (r'abc', 'c'),
  139. (r'3+', '3'),
  140. (r'[0]', '0'),
  141. (r'([0])', '0'),
  142. (r'(?:[0])', '0'),
  143. (r'(?:[0])|0|((((0))))', '0'),
  144. )
  145. for case in cases:
  146. regexp, char = case
  147. yield check_positive_must_contain, regexp, char
  148. def test_negative_must_contain():
  149. cases = (
  150. (r'[a-z]', '1'),
  151. (r'2{0}1', '2'),
  152. (r'3?', '3'),
  153. (r'3*', '3'),
  154. (r'3*?', '3'),
  155. (r'3+a', 'b'),
  156. (r'[a-z]', 'a'),
  157. (r'(?:a|b)', 'a'),
  158. (r'(?:a|b)', 'b'),
  159. (r'(/|:|[a-z])', '/'),
  160. (r'(/|:|[a-z])', 'z'),
  161. (r'[^a-z]', '\n'),
  162. (r'[^0]', '0'),
  163. (r'[^0-2]', '0'),
  164. (r'[^0123a-z]', 'z'),
  165. (r'\s', '\x20'),
  166. (r'[^\s]', '\n'),
  167. (r'\d', '3'),
  168. (r'[^\d]', 'a'),
  169. (r'\w', 'a'),
  170. (r'[^\w]', '\n'),
  171. (r'\W', '\n'),
  172. (r'[^\W]', 'a'),
  173. (r'.', '\n')
  174. )
  175. for case in cases:
  176. regexp, char = case
  177. yield check_negative_must_contain, regexp, char
  178. def test_positive_must_startswith():
  179. cases = (
  180. (r'foo', 'f', True),
  181. (r'^foo', 'f', False),
  182. (r'(^foo)', 'f', True),
  183. (r'^((a))', 'a', False),
  184. (r'((a))', 'a', True),
  185. (r'^[a-z]{0}0', '0', False),
  186. (r'^a{1}0', 'a', False),
  187. )
  188. for case in cases:
  189. regexp, check, strict = case
  190. yield check_positive_must_startswith, regexp, check, strict
  191. def test_negative_must_startswith():
  192. cases = (
  193. (r'foo', 'o', False),
  194. (r'^foo', 'o', False),
  195. (r'(^foo)', 'o', False),
  196. (r'[a-z]', '1', True),
  197. (r'[a-z]', 'a', True),
  198. (r'/[^/]+', 'a', True),
  199. (r'3?', '3', True),
  200. (r'3*', '3', True),
  201. (r'3*?', '3', True),
  202. (r'3+a', 'b', True),
  203. (r'^((a))', 'b', False),
  204. (r'((a))', 'a', False),
  205. (r'^a{0}0', 'a', False),
  206. )
  207. for case in cases:
  208. regexp, check, strict = case
  209. yield check_negative_must_startswith, regexp, check, strict
  210. def test_generate():
  211. cases = (
  212. (r'foo', ['foo']),
  213. (r'^sss', ['^sss']),
  214. (r'(1)(2)(3)', ['123']),
  215. (r'(1)((2)|(?:3))', ['12', '13']),
  216. (r'(^1?2?|aa/)', ['^', '^1', '^2', '^12', 'aa/']),
  217. (r'^https?://yandex.ru', ['^http://yandex|ru', '^https://yandex|ru']),
  218. (r'(^bb|11)$', ['^bb$', '11$']),
  219. (r'(http|https)', ['http', 'https']),
  220. (r'1*', ['', '11111']),
  221. (r'1*?', ['', '11111']),
  222. (r'1[0]?2', ['102', '12']),
  223. (r'1[0]2', ['102']),
  224. (r'1+', ['11111']),
  225. (r'[^/]?', ['', '|']),
  226. (r'^http://(foo|bar)|baz', ['^http://foo', '^http://bar', 'baz']),
  227. (r'[^\x00-\x7b|\x7e-\xff]', ['\x7d']),
  228. (r'(a|b|c)', ['a', 'b', 'c']),
  229. (r'[xyz]', ['x', 'y', 'z'])
  230. )
  231. for case in cases:
  232. regexp, values = case
  233. yield check_generate, regexp, values
  234. def test_strict_generate():
  235. reg = Regexp('^foo|bar', strict=True)
  236. assert_equals(sorted(reg.generate('|', anchored=True)), sorted(['^foo', '^bar']))
  237. def test_gen_anchor():
  238. reg = Regexp('^some$')
  239. val = next(reg.generate('', anchored=False))
  240. assert_equals(val, 'some')
  241. reg = Regexp('^some$')
  242. val = next(reg.generate('', anchored=True))
  243. assert_equals(val, '^some$')
  244. reg = Regexp('^some$', strict=True)
  245. val = next(reg.generate('', anchored=False))
  246. assert_equals(val, 'some')
  247. reg = Regexp('^some$', strict=True)
  248. val = next(reg.generate('', anchored=True))
  249. assert_equals(val, '^some$')
  250. def test_group_can_contains():
  251. source = '/some/(?P<action>[^/:.]+)/'
  252. reg = Regexp(source)
  253. assert_true(reg.can_contain('\n'),
  254. 'Whole regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n'))
  255. assert_true(reg.group(0).can_contain('\n'),
  256. 'Group 0 from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n'))
  257. assert_true(reg.group('action').can_contain('\n'),
  258. 'Group "action" from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n'))
  259. assert_true(reg.group(1).can_contain('\n'),
  260. 'Group 1 from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n'))
  261. assert_false(reg.group('action').can_contain('/'),
  262. 'Group "action" from regex "{src}" CAN\'T (!) contain {sym!r}'.format(src=source, sym='/'))
  263. def check_positive_contain(regexp, char):
  264. reg = Regexp(regexp, case_sensitive=True)
  265. assert_true(reg.can_contain(char),
  266. '{reg!r} should contain {chr!r}'.format(reg=regexp, chr=char))
  267. reg = Regexp(regexp, case_sensitive=False)
  268. char = char.upper()
  269. assert_true(reg.can_contain(char),
  270. '{reg!r} (case insensitive) should contain {chr!r}'.format(reg=regexp, chr=char))
  271. def check_negative_contain(regexp, char):
  272. reg = Regexp(regexp, case_sensitive=True)
  273. assert_false(reg.can_contain(char),
  274. '{reg!r} should not contain {chr!r}'.format(reg=regexp, chr=char))
  275. reg = Regexp(regexp, case_sensitive=False)
  276. char = char.upper()
  277. assert_false(reg.can_contain(char),
  278. '{reg!r} (case insensitive) should not contain {chr!r}'.format(reg=regexp, chr=char))
  279. def check_positive_startswith(regexp, char, strict):
  280. reg = Regexp(regexp, case_sensitive=True, strict=strict)
  281. assert_true(reg.can_startswith(char),
  282. '{reg!r} can start\'s with {chr!r}'.format(reg=regexp, chr=char))
  283. reg = Regexp(regexp, case_sensitive=False, strict=strict)
  284. char = char.upper()
  285. assert_true(reg.can_startswith(char),
  286. '{reg!r} (case insensitive) can start\'s with {chr!r}'.format(reg=regexp, chr=char))
  287. def check_negative_startswith(regexp, char, strict):
  288. reg = Regexp(regexp, case_sensitive=True, strict=strict)
  289. assert_false(reg.can_startswith(char),
  290. '{reg!r} can\'t start\'s with {chr!r}'.format(reg=regexp, chr=char))
  291. reg = Regexp(regexp, case_sensitive=False, strict=strict)
  292. char = char.upper()
  293. assert_false(reg.can_startswith(char),
  294. '{reg!r} (case insensitive) can\'t start\'s with {chr!r}'.format(reg=regexp, chr=char))
  295. def check_groups_names(regexp, groups):
  296. reg = Regexp(regexp)
  297. assert_equals(set(reg.groups.keys()), set(groups))
  298. def check_to_string(regexp, string):
  299. reg = Regexp(regexp)
  300. assert_equals(str(reg), string)
  301. def check_positive_must_contain(regexp, char):
  302. reg = Regexp(regexp, case_sensitive=True)
  303. assert_true(reg.must_contain(char),
  304. '{reg!r} must contain with {chr!r}'.format(reg=regexp, chr=char))
  305. reg = Regexp(regexp, case_sensitive=False)
  306. char = char.upper()
  307. assert_true(reg.must_contain(char),
  308. '{reg!r} (case insensitive) must contain with {chr!r}'.format(reg=regexp, chr=char))
  309. def check_negative_must_contain(regexp, char):
  310. reg = Regexp(regexp, case_sensitive=True)
  311. assert_false(reg.must_contain(char),
  312. '{reg!r} must NOT contain with {chr!r}'.format(reg=regexp, chr=char))
  313. reg = Regexp(regexp, case_sensitive=False)
  314. char = char.upper()
  315. assert_false(reg.must_contain(char),
  316. '{reg!r} (case insensitive) must NOT contain with {chr!r}'.format(reg=regexp, chr=char))
  317. def check_positive_must_startswith(regexp, char, strict):
  318. reg = Regexp(regexp, case_sensitive=True, strict=strict)
  319. assert_true(reg.must_startswith(char),
  320. '{reg!r} MUST start\'s with {chr!r}'.format(reg=regexp, chr=char))
  321. reg = Regexp(regexp, case_sensitive=False, strict=strict)
  322. char = char.upper()
  323. assert_true(reg.must_startswith(char),
  324. '{reg!r} (case insensitive) MUST start\'s with {chr!r}'.format(reg=regexp, chr=char))
  325. def check_negative_must_startswith(regexp, char, strict):
  326. reg = Regexp(regexp, case_sensitive=True, strict=strict)
  327. assert_false(reg.must_startswith(char),
  328. '{reg!r} MUST NOT start\'s with {chr!r}'.format(reg=regexp, chr=char))
  329. reg = Regexp(regexp, case_sensitive=False, strict=strict)
  330. char = char.upper()
  331. assert_false(reg.must_startswith(char),
  332. '{reg!r} (case insensitive) MUST NOT start\'s with {chr!r}'.format(reg=regexp, chr=char))
  333. def check_generate(regexp, values):
  334. reg = Regexp(regexp)
  335. assert_equals(sorted(reg.generate('|', anchored=True)), sorted(values))